You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:39:51 UTC
[08/51] [partial] Mass convert mixed tabs to spaces
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/IntBlockPool.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/IntBlockPool.cs b/src/core/Index/IntBlockPool.cs
index 5fbee30..5b589e5 100644
--- a/src/core/Index/IntBlockPool.cs
+++ b/src/core/Index/IntBlockPool.cs
@@ -19,61 +19,61 @@ using System;
namespace Lucene.Net.Index
{
-
- sealed class IntBlockPool
- {
- private void InitBlock()
- {
- intUpto = DocumentsWriter.INT_BLOCK_SIZE;
- }
-
- public int[][] buffers = new int[10][];
-
- internal int bufferUpto = - 1; // Which buffer we are upto
- public int intUpto; // Where we are in head buffer
-
- public int[] buffer; // Current head buffer
- public int intOffset = - DocumentsWriter.INT_BLOCK_SIZE; // Current head offset
-
- private DocumentsWriter docWriter;
- internal bool trackAllocations;
-
- public IntBlockPool(DocumentsWriter docWriter, bool trackAllocations)
- {
- InitBlock();
- this.docWriter = docWriter;
- this.trackAllocations = trackAllocations;
- }
-
- public void Reset()
- {
- if (bufferUpto != - 1)
- {
- if (bufferUpto > 0)
- // Recycle all but the first buffer
- docWriter.RecycleIntBlocks(buffers, 1, 1 + bufferUpto);
-
- // Reuse first buffer
- bufferUpto = 0;
- intUpto = 0;
- intOffset = 0;
- buffer = buffers[0];
- }
- }
-
- public void NextBuffer()
- {
- if (1 + bufferUpto == buffers.Length)
- {
- int[][] newBuffers = new int[(int) (buffers.Length * 1.5)][];
- Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
- buffers = newBuffers;
- }
- buffer = buffers[1 + bufferUpto] = docWriter.GetIntBlock(trackAllocations);
- bufferUpto++;
-
- intUpto = 0;
- intOffset += DocumentsWriter.INT_BLOCK_SIZE;
- }
- }
+
+ sealed class IntBlockPool
+ {
+ private void InitBlock()
+ {
+ intUpto = DocumentsWriter.INT_BLOCK_SIZE;
+ }
+
+ public int[][] buffers = new int[10][];
+
+ internal int bufferUpto = - 1; // Which buffer we are upto
+ public int intUpto; // Where we are in head buffer
+
+ public int[] buffer; // Current head buffer
+ public int intOffset = - DocumentsWriter.INT_BLOCK_SIZE; // Current head offset
+
+ private DocumentsWriter docWriter;
+ internal bool trackAllocations;
+
+ public IntBlockPool(DocumentsWriter docWriter, bool trackAllocations)
+ {
+ InitBlock();
+ this.docWriter = docWriter;
+ this.trackAllocations = trackAllocations;
+ }
+
+ public void Reset()
+ {
+ if (bufferUpto != - 1)
+ {
+ if (bufferUpto > 0)
+ // Recycle all but the first buffer
+ docWriter.RecycleIntBlocks(buffers, 1, 1 + bufferUpto);
+
+ // Reuse first buffer
+ bufferUpto = 0;
+ intUpto = 0;
+ intOffset = 0;
+ buffer = buffers[0];
+ }
+ }
+
+ public void NextBuffer()
+ {
+ if (1 + bufferUpto == buffers.Length)
+ {
+ int[][] newBuffers = new int[(int) (buffers.Length * 1.5)][];
+ Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
+ buffers = newBuffers;
+ }
+ buffer = buffers[1 + bufferUpto] = docWriter.GetIntBlock(trackAllocations);
+ bufferUpto++;
+
+ intUpto = 0;
+ intOffset += DocumentsWriter.INT_BLOCK_SIZE;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocConsumer.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/InvertedDocConsumer.cs b/src/core/Index/InvertedDocConsumer.cs
index bb9b2f8..2be2a70 100644
--- a/src/core/Index/InvertedDocConsumer.cs
+++ b/src/core/Index/InvertedDocConsumer.cs
@@ -20,34 +20,34 @@ using System.Collections.Generic;
namespace Lucene.Net.Index
{
-
- abstract class InvertedDocConsumer
- {
-
- /// <summary>Add a new thread </summary>
- internal abstract InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread);
-
- /// <summary>Abort (called after hitting AbortException) </summary>
- public abstract void Abort();
+
+ abstract class InvertedDocConsumer
+ {
+
+ /// <summary>Add a new thread </summary>
+ internal abstract InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread);
+
+ /// <summary>Abort (called after hitting AbortException) </summary>
+ public abstract void Abort();
- /// <summary>Flush a new segment </summary>
- internal abstract void Flush(
- IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields,
- SegmentWriteState state);
-
- /// <summary>Close doc stores </summary>
- internal abstract void CloseDocStore(SegmentWriteState state);
-
- /// <summary>Attempt to free RAM, returning true if any RAM was
- /// freed
- /// </summary>
- public abstract bool FreeRAM();
-
- internal FieldInfos fieldInfos;
-
- internal virtual void SetFieldInfos(FieldInfos fieldInfos)
- {
- this.fieldInfos = fieldInfos;
- }
- }
+ /// <summary>Flush a new segment </summary>
+ internal abstract void Flush(
+ IDictionary<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>> threadsAndFields,
+ SegmentWriteState state);
+
+ /// <summary>Close doc stores </summary>
+ internal abstract void CloseDocStore(SegmentWriteState state);
+
+ /// <summary>Attempt to free RAM, returning true if any RAM was
+ /// freed
+ /// </summary>
+ public abstract bool FreeRAM();
+
+ internal FieldInfos fieldInfos;
+
+ internal virtual void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocConsumerPerField.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/InvertedDocConsumerPerField.cs b/src/core/Index/InvertedDocConsumerPerField.cs
index 471d9b7..200afed 100644
--- a/src/core/Index/InvertedDocConsumerPerField.cs
+++ b/src/core/Index/InvertedDocConsumerPerField.cs
@@ -20,27 +20,27 @@ using Lucene.Net.Documents;
namespace Lucene.Net.Index
{
-
- abstract class InvertedDocConsumerPerField
- {
-
- // Called once per field, and is given all Fieldable
- // occurrences for this field in the document. Return
- // true if you wish to see inverted tokens for these
- // fields:
- internal abstract bool Start(IFieldable[] fields, int count);
-
- // Called before a field instance is being processed
- internal abstract void Start(IFieldable field);
-
- // Called once per inverted token
- internal abstract void Add();
-
- // Called once per field per document, after all Fieldable
- // occurrences are inverted
- internal abstract void Finish();
-
- // Called on hitting an aborting exception
- public abstract void Abort();
- }
+
+ abstract class InvertedDocConsumerPerField
+ {
+
+ // Called once per field, and is given all Fieldable
+ // occurrences for this field in the document. Return
+ // true if you wish to see inverted tokens for these
+ // fields:
+ internal abstract bool Start(IFieldable[] fields, int count);
+
+ // Called before a field instance is being processed
+ internal abstract void Start(IFieldable field);
+
+ // Called once per inverted token
+ internal abstract void Add();
+
+ // Called once per field per document, after all Fieldable
+ // occurrences are inverted
+ internal abstract void Finish();
+
+ // Called on hitting an aborting exception
+ public abstract void Abort();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocConsumerPerThread.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/InvertedDocConsumerPerThread.cs b/src/core/Index/InvertedDocConsumerPerThread.cs
index 49ed8df..5e0b6d1 100644
--- a/src/core/Index/InvertedDocConsumerPerThread.cs
+++ b/src/core/Index/InvertedDocConsumerPerThread.cs
@@ -19,12 +19,12 @@ using System;
namespace Lucene.Net.Index
{
-
- abstract class InvertedDocConsumerPerThread
- {
- public abstract void StartDocument();
- internal abstract InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
- public abstract DocumentsWriter.DocWriter FinishDocument();
- public abstract void Abort();
- }
+
+ abstract class InvertedDocConsumerPerThread
+ {
+ public abstract void StartDocument();
+ internal abstract InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+ public abstract DocumentsWriter.DocWriter FinishDocument();
+ public abstract void Abort();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocEndConsumer.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/InvertedDocEndConsumer.cs b/src/core/Index/InvertedDocEndConsumer.cs
index fb0a69e..f9e9548 100644
--- a/src/core/Index/InvertedDocEndConsumer.cs
+++ b/src/core/Index/InvertedDocEndConsumer.cs
@@ -20,13 +20,13 @@ using System.Collections.Generic;
namespace Lucene.Net.Index
{
-
- abstract class InvertedDocEndConsumer
- {
- public abstract InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread);
+
+ abstract class InvertedDocEndConsumer
+ {
+ public abstract InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread);
public abstract void Flush(IDictionary<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state);
- internal abstract void CloseDocStore(SegmentWriteState state);
- public abstract void Abort();
- internal abstract void SetFieldInfos(FieldInfos fieldInfos);
- }
+ internal abstract void CloseDocStore(SegmentWriteState state);
+ public abstract void Abort();
+ internal abstract void SetFieldInfos(FieldInfos fieldInfos);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocEndConsumerPerField.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/InvertedDocEndConsumerPerField.cs b/src/core/Index/InvertedDocEndConsumerPerField.cs
index dfad1c9..2e82ad4 100644
--- a/src/core/Index/InvertedDocEndConsumerPerField.cs
+++ b/src/core/Index/InvertedDocEndConsumerPerField.cs
@@ -19,10 +19,10 @@ using System;
namespace Lucene.Net.Index
{
-
- abstract class InvertedDocEndConsumerPerField
- {
- internal abstract void Finish();
- internal abstract void Abort();
- }
+
+ abstract class InvertedDocEndConsumerPerField
+ {
+ internal abstract void Finish();
+ internal abstract void Abort();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/InvertedDocEndConsumerPerThread.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/InvertedDocEndConsumerPerThread.cs b/src/core/Index/InvertedDocEndConsumerPerThread.cs
index 2f4fb5c..4721566 100644
--- a/src/core/Index/InvertedDocEndConsumerPerThread.cs
+++ b/src/core/Index/InvertedDocEndConsumerPerThread.cs
@@ -19,12 +19,12 @@ using System;
namespace Lucene.Net.Index
{
-
- abstract class InvertedDocEndConsumerPerThread
- {
- internal abstract void StartDocument();
- internal abstract InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
- internal abstract void FinishDocument();
- internal abstract void Abort();
- }
+
+ abstract class InvertedDocEndConsumerPerThread
+ {
+ internal abstract void StartDocument();
+ internal abstract InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+ internal abstract void FinishDocument();
+ internal abstract void Abort();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs b/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs
index 3775de1..7cb928b 100644
--- a/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs
+++ b/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs
@@ -19,33 +19,33 @@ using System.Collections.Generic;
namespace Lucene.Net.Index
{
-
- /// <summary> This <see cref="IndexDeletionPolicy" /> implementation that
- /// keeps only the most recent commit and immediately removes
- /// all prior commits after a new commit is done. This is
- /// the default deletion policy.
- /// </summary>
-
- public sealed class KeepOnlyLastCommitDeletionPolicy : IndexDeletionPolicy
- {
-
- /// <summary> Deletes all commits except the most recent one.</summary>
- public void OnInit<T>(IList<T> commits) where T : IndexCommit
- {
- // Note that commits.size() should normally be 1:
- OnCommit(commits);
- }
-
- /// <summary> Deletes all commits except the most recent one.</summary>
- public void OnCommit<T>(IList<T> commits) where T : IndexCommit
- {
- // Note that commits.size() should normally be 2 (if not
- // called by onInit above):
- int size = commits.Count;
- for (int i = 0; i < size - 1; i++)
- {
- commits[i].Delete();
- }
- }
- }
+
+ /// <summary> This <see cref="IndexDeletionPolicy" /> implementation that
+ /// keeps only the most recent commit and immediately removes
+ /// all prior commits after a new commit is done. This is
+ /// the default deletion policy.
+ /// </summary>
+
+ public sealed class KeepOnlyLastCommitDeletionPolicy : IndexDeletionPolicy
+ {
+
+ /// <summary> Deletes all commits except the most recent one.</summary>
+ public void OnInit<T>(IList<T> commits) where T : IndexCommit
+ {
+ // Note that commits.size() should normally be 1:
+ OnCommit(commits);
+ }
+
+ /// <summary> Deletes all commits except the most recent one.</summary>
+ public void OnCommit<T>(IList<T> commits) where T : IndexCommit
+ {
+ // Note that commits.size() should normally be 2 (if not
+ // called by onInit above):
+ int size = commits.Count;
+ for (int i = 0; i < size - 1; i++)
+ {
+ commits[i].Delete();
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/LogByteSizeMergePolicy.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/LogByteSizeMergePolicy.cs b/src/core/Index/LogByteSizeMergePolicy.cs
index 5d5c952..5f1b13d 100644
--- a/src/core/Index/LogByteSizeMergePolicy.cs
+++ b/src/core/Index/LogByteSizeMergePolicy.cs
@@ -19,34 +19,34 @@ using System;
namespace Lucene.Net.Index
{
-
- /// <summary>This is a <see cref="LogMergePolicy" /> that measures size of a
- /// segment as the total byte size of the segment's files.
- /// </summary>
- public class LogByteSizeMergePolicy : LogMergePolicy
- {
-
- /// <seealso cref="MinMergeMB">
- /// </seealso>
- public const double DEFAULT_MIN_MERGE_MB = 1.6;
-
- /// <summary>Default maximum segment size. A segment of this size</summary>
- /// <seealso cref="MaxMergeMB">
- /// </seealso>
- public static readonly long DEFAULT_MAX_MERGE_MB = long.MaxValue;
-
- public LogByteSizeMergePolicy(IndexWriter writer)
+
+ /// <summary>This is a <see cref="LogMergePolicy" /> that measures size of a
+ /// segment as the total byte size of the segment's files.
+ /// </summary>
+ public class LogByteSizeMergePolicy : LogMergePolicy
+ {
+
+ /// <seealso cref="MinMergeMB">
+ /// </seealso>
+ public const double DEFAULT_MIN_MERGE_MB = 1.6;
+
+ /// <summary>Default maximum segment size. A segment of this size</summary>
+ /// <seealso cref="MaxMergeMB">
+ /// </seealso>
+ public static readonly long DEFAULT_MAX_MERGE_MB = long.MaxValue;
+
+ public LogByteSizeMergePolicy(IndexWriter writer)
: base(writer)
- {
- minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024);
+ {
+ minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024);
//mgarski - the line below causes an overflow in .NET, resulting in a negative number...
- //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024);
+ //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024);
maxMergeSize = DEFAULT_MAX_MERGE_MB;
- }
- protected internal override long Size(SegmentInfo info)
- {
- return SizeBytes(info);
- }
+ }
+ protected internal override long Size(SegmentInfo info)
+ {
+ return SizeBytes(info);
+ }
protected override void Dispose(bool disposing)
{
@@ -54,46 +54,46 @@ namespace Lucene.Net.Index
}
- /// <summary><p/>Gets or sets the largest segment (measured by total
- /// byte size of the segment's files, in MB) that may be
- /// merged with other segments. Small values (e.g., less
- /// than 50 MB) are best for interactive indexing, as this
- /// limits the length of pauses while indexing to a few
- /// seconds. Larger values are best for batched indexing
- /// and speedier searches.<p/>
- ///
- /// <p/>Note that <see cref="IndexWriter.MaxMergeDocs" /> is also
- /// used to check whether a segment is too large for
- /// merging (it's either or).<p/>
- /// </summary>
- public virtual double MaxMergeMB
- {
- get { return maxMergeSize/1024d/1024d; }
- set
- {
- //mgarski: java gracefully overflows to Int64.MaxValue, .NET to MinValue...
- maxMergeSize = (long) (value*1024*1024);
- if (maxMergeSize < 0)
- {
- maxMergeSize = DEFAULT_MAX_MERGE_MB;
- }
- }
- }
+ /// <summary><p/>Gets or sets the largest segment (measured by total
+ /// byte size of the segment's files, in MB) that may be
+ /// merged with other segments. Small values (e.g., less
+ /// than 50 MB) are best for interactive indexing, as this
+ /// limits the length of pauses while indexing to a few
+ /// seconds. Larger values are best for batched indexing
+ /// and speedier searches.<p/>
+ ///
+ /// <p/>Note that <see cref="IndexWriter.MaxMergeDocs" /> is also
+ /// used to check whether a segment is too large for
+ /// merging (it's either or).<p/>
+ /// </summary>
+ public virtual double MaxMergeMB
+ {
+ get { return maxMergeSize/1024d/1024d; }
+ set
+ {
+ //mgarski: java gracefully overflows to Int64.MaxValue, .NET to MinValue...
+ maxMergeSize = (long) (value*1024*1024);
+ if (maxMergeSize < 0)
+ {
+ maxMergeSize = DEFAULT_MAX_MERGE_MB;
+ }
+ }
+ }
- /// <summary>Gets or sets the minimum size for the lowest level segments.
- /// Any segments below this size are considered to be on
- /// the same level (even if they vary drastically in size)
- /// and will be merged whenever there are mergeFactor of
- /// them. This effectively truncates the "long tail" of
- /// small segments that would otherwise be created into a
- /// single level. If you set this too large, it could
- /// greatly increase the merging cost during indexing (if
- /// you flush many small segments).
- /// </summary>
- public virtual double MinMergeMB
- {
- get { return ((double) minMergeSize)/1024/1024; }
- set { minMergeSize = (long) (value*1024*1024); }
- }
- }
+ /// <summary>Gets or sets the minimum size for the lowest level segments.
+ /// Any segments below this size are considered to be on
+ /// the same level (even if they vary drastically in size)
+ /// and will be merged whenever there are mergeFactor of
+ /// them. This effectively truncates the "long tail" of
+ /// small segments that would otherwise be created into a
+ /// single level. If you set this too large, it could
+ /// greatly increase the merging cost during indexing (if
+ /// you flush many small segments).
+ /// </summary>
+ public virtual double MinMergeMB
+ {
+ get { return ((double) minMergeSize)/1024/1024; }
+ set { minMergeSize = (long) (value*1024*1024); }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/LogDocMergePolicy.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/LogDocMergePolicy.cs b/src/core/Index/LogDocMergePolicy.cs
index 55ee407..610b890 100644
--- a/src/core/Index/LogDocMergePolicy.cs
+++ b/src/core/Index/LogDocMergePolicy.cs
@@ -19,51 +19,51 @@ using System;
namespace Lucene.Net.Index
{
-
- /// <summary>This is a <see cref="LogMergePolicy" /> that measures size of a
- /// segment as the number of documents (not taking deletions
- /// into account).
- /// </summary>
-
- public class LogDocMergePolicy : LogMergePolicy
- {
-
- /// <seealso cref="MinMergeDocs">
- /// </seealso>
- public const int DEFAULT_MIN_MERGE_DOCS = 1000;
-
- public LogDocMergePolicy(IndexWriter writer):base(writer)
- {
- minMergeSize = DEFAULT_MIN_MERGE_DOCS;
-
- // maxMergeSize is never used by LogDocMergePolicy; set
- // it to Long.MAX_VALUE to disable it
- maxMergeSize = System.Int64.MaxValue;
- }
- protected internal override long Size(SegmentInfo info)
- {
- return SizeDocs(info);
- }
+
+ /// <summary>This is a <see cref="LogMergePolicy" /> that measures size of a
+ /// segment as the number of documents (not taking deletions
+ /// into account).
+ /// </summary>
+
+ public class LogDocMergePolicy : LogMergePolicy
+ {
+
+ /// <seealso cref="MinMergeDocs">
+ /// </seealso>
+ public const int DEFAULT_MIN_MERGE_DOCS = 1000;
+
+ public LogDocMergePolicy(IndexWriter writer):base(writer)
+ {
+ minMergeSize = DEFAULT_MIN_MERGE_DOCS;
+
+ // maxMergeSize is never used by LogDocMergePolicy; set
+ // it to Long.MAX_VALUE to disable it
+ maxMergeSize = System.Int64.MaxValue;
+ }
+ protected internal override long Size(SegmentInfo info)
+ {
+ return SizeDocs(info);
+ }
- protected override void Dispose(bool disposing)
+ protected override void Dispose(bool disposing)
{
// Do nothing.
}
- /// <summary>Gets or sets the minimum size for the lowest level segments.
- /// Any segments below this size are considered to be on
- /// the same level (even if they vary drastically in size)
- /// and will be merged whenever there are mergeFactor of
- /// them. This effectively truncates the "long tail" of
- /// small segments that would otherwise be created into a
- /// single level. If you set this too large, it could
- /// greatly increase the merging cost during indexing (if
- /// you flush many small segments).
- /// </summary>
- public virtual int MinMergeDocs
- {
- get { return (int) minMergeSize; }
- set { minMergeSize = value; }
- }
- }
+ /// <summary>Gets or sets the minimum size for the lowest level segments.
+ /// Any segments below this size are considered to be on
+ /// the same level (even if they vary drastically in size)
+ /// and will be merged whenever there are mergeFactor of
+ /// them. This effectively truncates the "long tail" of
+ /// small segments that would otherwise be created into a
+ /// single level. If you set this too large, it could
+ /// greatly increase the merging cost during indexing (if
+ /// you flush many small segments).
+ /// </summary>
+ public virtual int MinMergeDocs
+ {
+ get { return (int) minMergeSize; }
+ set { minMergeSize = value; }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/LogMergePolicy.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/LogMergePolicy.cs b/src/core/Index/LogMergePolicy.cs
index c087835..5c65c92 100644
--- a/src/core/Index/LogMergePolicy.cs
+++ b/src/core/Index/LogMergePolicy.cs
@@ -20,508 +20,508 @@ using System.Collections.Generic;
namespace Lucene.Net.Index
{
-
- /// <summary><p/>This class implements a <see cref="MergePolicy" /> that tries
- /// to merge segments into levels of exponentially
- /// increasing size, where each level has fewer segments than
- /// the value of the merge factor. Whenever extra segments
- /// (beyond the merge factor upper bound) are encountered,
- /// all segments within the level are merged. You can get or
- /// set the merge factor using <see cref="MergeFactor" /> and
- /// <see cref="MergeFactor" /> respectively.<p/>
- ///
- /// <p/>This class is abstract and requires a subclass to
- /// define the <see cref="Size" /> method which specifies how a
- /// segment's size is determined. <see cref="LogDocMergePolicy" />
- /// is one subclass that measures size by document count in
- /// the segment. <see cref="LogByteSizeMergePolicy" /> is another
- /// subclass that measures size as the total byte size of the
- /// file(s) for the segment.<p/>
- /// </summary>
-
- public abstract class LogMergePolicy : MergePolicy
- {
-
- /// <summary>Defines the allowed range of log(size) for each
- /// level. A level is computed by taking the max segment
- /// log size, minus LEVEL_LOG_SPAN, and finding all
- /// segments falling within that range.
- /// </summary>
- public const double LEVEL_LOG_SPAN = 0.75;
-
- /// <summary>Default merge factor, which is how many segments are
- /// merged at a time
- /// </summary>
- public const int DEFAULT_MERGE_FACTOR = 10;
-
- /// <summary>Default maximum segment size. A segment of this size</summary>
- /// <seealso cref="MaxMergeDocs">
- /// </seealso>
- public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue;
+
+ /// <summary><p/>This class implements a <see cref="MergePolicy" /> that tries
+ /// to merge segments into levels of exponentially
+ /// increasing size, where each level has fewer segments than
+ /// the value of the merge factor. Whenever extra segments
+ /// (beyond the merge factor upper bound) are encountered,
+ /// all segments within the level are merged. You can get or
+ /// set the merge factor using <see cref="MergeFactor" /> and
+ /// <see cref="MergeFactor" /> respectively.<p/>
+ ///
+ /// <p/>This class is abstract and requires a subclass to
+ /// define the <see cref="Size" /> method which specifies how a
+ /// segment's size is determined. <see cref="LogDocMergePolicy" />
+ /// is one subclass that measures size by document count in
+ /// the segment. <see cref="LogByteSizeMergePolicy" /> is another
+ /// subclass that measures size as the total byte size of the
+ /// file(s) for the segment.<p/>
+ /// </summary>
+
+ public abstract class LogMergePolicy : MergePolicy
+ {
+
+ /// <summary>Defines the allowed range of log(size) for each
+ /// level. A level is computed by taking the max segment
+ /// log size, minus LEVEL_LOG_SPAN, and finding all
+ /// segments falling within that range.
+ /// </summary>
+ public const double LEVEL_LOG_SPAN = 0.75;
+
+ /// <summary>Default merge factor, which is how many segments are
+ /// merged at a time
+ /// </summary>
+ public const int DEFAULT_MERGE_FACTOR = 10;
+
+ /// <summary>Default maximum segment size. A segment of this size</summary>
+ /// <seealso cref="MaxMergeDocs">
+ /// </seealso>
+ public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue;
/// <summary> Default noCFSRatio. If a merge's size is >= 10% of
/// the index, then we disable compound file for it.
/// See <see cref="NoCFSRatio"/>
/// </summary>
public static double DEFAULT_NO_CFS_RATIO = 0.1;
-
- private int mergeFactor = DEFAULT_MERGE_FACTOR;
-
- internal long minMergeSize;
- internal long maxMergeSize;
- internal int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+
+ private int mergeFactor = DEFAULT_MERGE_FACTOR;
+
+ internal long minMergeSize;
+ internal long maxMergeSize;
+ internal int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
protected double internalNoCFSRatio = DEFAULT_NO_CFS_RATIO;
-
- /* TODO 3.0: change this default to true */
- protected internal bool internalCalibrateSizeByDeletes = true;
-
- private bool useCompoundFile = true;
- private bool useCompoundDocStore = true;
+
+ /* TODO 3.0: change this default to true */
+ protected internal bool internalCalibrateSizeByDeletes = true;
+
+ private bool useCompoundFile = true;
+ private bool useCompoundDocStore = true;
- protected LogMergePolicy(IndexWriter writer):base(writer)
- {
- }
-
- protected internal virtual bool Verbose()
- {
- return writer != null && writer.Verbose;
- }
+ protected LogMergePolicy(IndexWriter writer):base(writer)
+ {
+ }
+
+ protected internal virtual bool Verbose()
+ {
+ return writer != null && writer.Verbose;
+ }
- public double NoCFSRatio
- {
- get { return internalNoCFSRatio; }
- set
- {
- if (value < 0.0 || value > 1.0)
- {
- throw new ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value);
- }
- this.internalNoCFSRatio = value;
- }
- }
+ public double NoCFSRatio
+ {
+ get { return internalNoCFSRatio; }
+ set
+ {
+ if (value < 0.0 || value > 1.0)
+ {
+ throw new ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value);
+ }
+ this.internalNoCFSRatio = value;
+ }
+ }
- /* If a merged segment will be more than this percentage
+ /* If a merged segment will be more than this percentage
* of the total size of the index, leave the segment as
* non-compound file even if compound file is enabled.
* Set to 1.0 to always use CFS regardless of merge
* size. */
- private void Message(System.String message)
- {
- if (Verbose())
- writer.Message("LMP: " + message);
- }
+ private void Message(System.String message)
+ {
+ if (Verbose())
+ writer.Message("LMP: " + message);
+ }
- /// <summary>Gets or sets how often segment indices are merged by
- /// addDocument(). With smaller values, less RAM is used
- /// while indexing, and searches on unoptimized indices are
- /// faster, but indexing speed is slower. With larger
- /// values, more RAM is used during indexing, and while
- /// searches on unoptimized indices are slower, indexing is
- /// faster. Thus larger values (> 10) are best for batch
- /// index creation, and smaller values (< 10) for indices
- /// that are interactively maintained.
- /// </summary>
- public virtual int MergeFactor
- {
- get { return mergeFactor; }
- set
- {
- if (value < 2)
- throw new System.ArgumentException("mergeFactor cannot be less than 2");
- this.mergeFactor = value;
- }
- }
+ /// <summary>Gets or sets how often segment indices are merged by
+ /// addDocument(). With smaller values, less RAM is used
+ /// while indexing, and searches on unoptimized indices are
+ /// faster, but indexing speed is slower. With larger
+ /// values, more RAM is used during indexing, and while
+ /// searches on unoptimized indices are slower, indexing is
+ /// faster. Thus larger values (> 10) are best for batch
+ /// index creation, and smaller values (< 10) for indices
+ /// that are interactively maintained.
+ /// </summary>
+ public virtual int MergeFactor
+ {
+ get { return mergeFactor; }
+ set
+ {
+ if (value < 2)
+ throw new System.ArgumentException("mergeFactor cannot be less than 2");
+ this.mergeFactor = value;
+ }
+ }
- public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)
- {
- return useCompoundFile;
- }
-
- /// <summary>Gets or sets whether compound file format should be used for
- /// newly flushed and newly merged segments.
- /// </summary>
- public virtual void SetUseCompoundFile(bool useCompoundFile)
- {
- this.useCompoundFile = useCompoundFile;
- }
+ public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info)
+ {
+ return useCompoundFile;
+ }
+
+ /// <summary>Gets or sets whether compound file format should be used for
+ /// newly flushed and newly merged segments.
+ /// </summary>
+ public virtual void SetUseCompoundFile(bool useCompoundFile)
+ {
+ this.useCompoundFile = useCompoundFile;
+ }
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public virtual bool GetUseCompoundFile()
- {
- return useCompoundFile;
- }
-
- // Javadoc inherited
- public override bool UseCompoundDocStore(SegmentInfos infos)
- {
- return useCompoundDocStore;
- }
-
- /// <summary>Sets whether compound file format should be used for
- /// newly flushed and newly merged doc store
- /// segment files (term vectors and stored fields).
- /// </summary>
- public virtual void SetUseCompoundDocStore(bool useCompoundDocStore)
- {
- this.useCompoundDocStore = useCompoundDocStore;
- }
-
- /// <summary>Returns true if newly flushed and newly merge doc
- /// store segment files (term vectors and stored fields)
- /// </summary>
+ {
+ return useCompoundFile;
+ }
+
+ // Javadoc inherited
+ public override bool UseCompoundDocStore(SegmentInfos infos)
+ {
+ return useCompoundDocStore;
+ }
+
+ /// <summary>Sets whether compound file format should be used for
+ /// newly flushed and newly merged doc store
+ /// segment files (term vectors and stored fields).
+ /// </summary>
+ public virtual void SetUseCompoundDocStore(bool useCompoundDocStore)
+ {
+ this.useCompoundDocStore = useCompoundDocStore;
+ }
+
+ /// <summary>Returns true if newly flushed and newly merge doc
+ /// store segment files (term vectors and stored fields)
+ /// </summary>
/// <seealso cref="SetUseCompoundDocStore ">
- /// </seealso>
+ /// </seealso>
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
public virtual bool GetUseCompoundDocStore()
- {
- return useCompoundDocStore;
- }
+ {
+ return useCompoundDocStore;
+ }
- /// <summary>Gets or sets whether the segment size should be calibrated by
- /// the number of deletes when choosing segments for merge.
- /// </summary>
- public virtual bool CalibrateSizeByDeletes
- {
- set { this.internalCalibrateSizeByDeletes = value; }
- get { return internalCalibrateSizeByDeletes; }
- }
+ /// <summary>Gets or sets whether the segment size should be calibrated by
+ /// the number of deletes when choosing segments for merge.
+ /// </summary>
+ public virtual bool CalibrateSizeByDeletes
+ {
+ set { this.internalCalibrateSizeByDeletes = value; }
+ get { return internalCalibrateSizeByDeletes; }
+ }
- abstract protected internal long Size(SegmentInfo info);
-
- protected internal virtual long SizeDocs(SegmentInfo info)
- {
- if (internalCalibrateSizeByDeletes)
- {
- int delCount = writer.NumDeletedDocs(info);
- return (info.docCount - (long) delCount);
- }
- else
- {
- return info.docCount;
- }
- }
-
- protected internal virtual long SizeBytes(SegmentInfo info)
- {
- long byteSize = info.SizeInBytes();
- if (internalCalibrateSizeByDeletes)
- {
- int delCount = writer.NumDeletedDocs(info);
- float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount));
- return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio)));
- }
- else
- {
- return byteSize;
- }
- }
-
- private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
- {
- int numSegments = infos.Count;
- int numToOptimize = 0;
- SegmentInfo optimizeInfo = null;
- for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
- {
- SegmentInfo info = infos.Info(i);
- if (segmentsToOptimize.Contains(info))
- {
- numToOptimize++;
- optimizeInfo = info;
- }
- }
-
- return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo));
- }
-
- /// <summary>Returns true if this single info is optimized (has no
- /// pending norms or deletes, is in the same dir as the
- /// writer, and matches the current compound file setting
- /// </summary>
- private bool IsOptimized(SegmentInfo info)
- {
- bool hasDeletions = writer.NumDeletedDocs(info) > 0;
- return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory &&
+ abstract protected internal long Size(SegmentInfo info);
+
+ protected internal virtual long SizeDocs(SegmentInfo info)
+ {
+ if (internalCalibrateSizeByDeletes)
+ {
+ int delCount = writer.NumDeletedDocs(info);
+ return (info.docCount - (long) delCount);
+ }
+ else
+ {
+ return info.docCount;
+ }
+ }
+
+ protected internal virtual long SizeBytes(SegmentInfo info)
+ {
+ long byteSize = info.SizeInBytes();
+ if (internalCalibrateSizeByDeletes)
+ {
+ int delCount = writer.NumDeletedDocs(info);
+ float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount));
+ return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio)));
+ }
+ else
+ {
+ return byteSize;
+ }
+ }
+
+ private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
+ {
+ int numSegments = infos.Count;
+ int numToOptimize = 0;
+ SegmentInfo optimizeInfo = null;
+ for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++)
+ {
+ SegmentInfo info = infos.Info(i);
+ if (segmentsToOptimize.Contains(info))
+ {
+ numToOptimize++;
+ optimizeInfo = info;
+ }
+ }
+
+ return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo));
+ }
+
+ /// <summary>Returns true if this single info is optimized (has no
+ /// pending norms or deletes, is in the same dir as the
+ /// writer, and matches the current compound file setting
+ /// </summary>
+ private bool IsOptimized(SegmentInfo info)
+ {
+ bool hasDeletions = writer.NumDeletedDocs(info) > 0;
+ return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory &&
(info.GetUseCompoundFile() == useCompoundFile || internalNoCFSRatio < 1.0);
- }
-
- /// <summary>Returns the merges necessary to optimize the index.
- /// This merge policy defines "optimized" to mean only one
- /// segment in the index, where that segment has no
- /// deletions pending nor separate norms, and it is in
- /// compound file format if the current useCompoundFile
- /// setting is true. This method returns multiple merges
- /// (mergeFactor at a time) so the <see cref="MergeScheduler" />
- /// in use may make use of concurrency.
- /// </summary>
- public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
- {
- MergeSpecification spec;
-
- System.Diagnostics.Debug.Assert(maxNumSegments > 0);
-
- if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
- {
-
- // Find the newest (rightmost) segment that needs to
- // be optimized (other segments may have been flushed
- // since optimize started):
- int last = infos.Count;
- while (last > 0)
- {
- SegmentInfo info = infos.Info(--last);
- if (segmentsToOptimize.Contains(info))
- {
- last++;
- break;
- }
- }
-
- if (last > 0)
- {
-
- spec = new MergeSpecification();
-
- // First, enroll all "full" merges (size
- // mergeFactor) to potentially be run concurrently:
- while (last - maxNumSegments + 1 >= mergeFactor)
- {
+ }
+
+ /// <summary>Returns the merges necessary to optimize the index.
+ /// This merge policy defines "optimized" to mean only one
+ /// segment in the index, where that segment has no
+ /// deletions pending nor separate norms, and it is in
+ /// compound file format if the current useCompoundFile
+ /// setting is true. This method returns multiple merges
+ /// (mergeFactor at a time) so the <see cref="MergeScheduler" />
+ /// in use may make use of concurrency.
+ /// </summary>
+ public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet<SegmentInfo> segmentsToOptimize)
+ {
+ MergeSpecification spec;
+
+ System.Diagnostics.Debug.Assert(maxNumSegments > 0);
+
+ if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize))
+ {
+
+ // Find the newest (rightmost) segment that needs to
+ // be optimized (other segments may have been flushed
+ // since optimize started):
+ int last = infos.Count;
+ while (last > 0)
+ {
+ SegmentInfo info = infos.Info(--last);
+ if (segmentsToOptimize.Contains(info))
+ {
+ last++;
+ break;
+ }
+ }
+
+ if (last > 0)
+ {
+
+ spec = new MergeSpecification();
+
+ // First, enroll all "full" merges (size
+ // mergeFactor) to potentially be run concurrently:
+ while (last - maxNumSegments + 1 >= mergeFactor)
+ {
spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last)));
- last -= mergeFactor;
- }
-
- // Only if there are no full merges pending do we
- // add a final partial (< mergeFactor segments) merge:
- if (0 == spec.merges.Count)
- {
- if (maxNumSegments == 1)
- {
-
- // Since we must optimize down to 1 segment, the
- // choice is simple:
- if (last > 1 || !IsOptimized(infos.Info(0)))
+ last -= mergeFactor;
+ }
+
+ // Only if there are no full merges pending do we
+ // add a final partial (< mergeFactor segments) merge:
+ if (0 == spec.merges.Count)
+ {
+ if (maxNumSegments == 1)
+ {
+
+ // Since we must optimize down to 1 segment, the
+ // choice is simple:
+ if (last > 1 || !IsOptimized(infos.Info(0)))
spec.Add(MakeOneMerge(infos, infos.Range(0, last)));
- }
- else if (last > maxNumSegments)
- {
-
- // Take care to pick a partial merge that is
- // least cost, but does not make the index too
- // lopsided. If we always just picked the
- // partial tail then we could produce a highly
- // lopsided index over time:
-
- // We must merge this many segments to leave
- // maxNumSegments in the index (from when
- // optimize was first kicked off):
- int finalMergeSize = last - maxNumSegments + 1;
-
- // Consider all possible starting points:
- long bestSize = 0;
- int bestStart = 0;
-
- for (int i = 0; i < last - finalMergeSize + 1; i++)
- {
- long sumSize = 0;
- for (int j = 0; j < finalMergeSize; j++)
- sumSize += Size(infos.Info(j + i));
- if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
- {
- bestStart = i;
- bestSize = sumSize;
- }
- }
+ }
+ else if (last > maxNumSegments)
+ {
+
+ // Take care to pick a partial merge that is
+ // least cost, but does not make the index too
+ // lopsided. If we always just picked the
+ // partial tail then we could produce a highly
+ // lopsided index over time:
+
+ // We must merge this many segments to leave
+ // maxNumSegments in the index (from when
+ // optimize was first kicked off):
+ int finalMergeSize = last - maxNumSegments + 1;
+
+ // Consider all possible starting points:
+ long bestSize = 0;
+ int bestStart = 0;
+
+ for (int i = 0; i < last - finalMergeSize + 1; i++)
+ {
+ long sumSize = 0;
+ for (int j = 0; j < finalMergeSize; j++)
+ sumSize += Size(infos.Info(j + i));
+ if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize))
+ {
+ bestStart = i;
+ bestSize = sumSize;
+ }
+ }
spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize)));
- }
- }
- }
- else
- spec = null;
- }
- else
- spec = null;
-
- return spec;
- }
-
- /// <summary> Finds merges necessary to expunge all deletes from the
- /// index. We simply merge adjacent segments that have
- /// deletes, up to mergeFactor at a time.
- /// </summary>
- public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
- {
- int numSegments = segmentInfos.Count;
-
- if (Verbose())
- Message("findMergesToExpungeDeletes: " + numSegments + " segments");
-
- MergeSpecification spec = new MergeSpecification();
- int firstSegmentWithDeletions = - 1;
- for (int i = 0; i < numSegments; i++)
- {
- SegmentInfo info = segmentInfos.Info(i);
- int delCount = writer.NumDeletedDocs(info);
- if (delCount > 0)
- {
- if (Verbose())
- Message(" segment " + info.name + " has deletions");
- if (firstSegmentWithDeletions == - 1)
- firstSegmentWithDeletions = i;
- else if (i - firstSegmentWithDeletions == mergeFactor)
- {
- // We've seen mergeFactor segments in a row with
- // deletions, so force a merge now:
- if (Verbose())
- Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
+ }
+ }
+ }
+ else
+ spec = null;
+ }
+ else
+ spec = null;
+
+ return spec;
+ }
+
+ /// <summary> Finds merges necessary to expunge all deletes from the
+ /// index. We simply merge adjacent segments that have
+ /// deletes, up to mergeFactor at a time.
+ /// </summary>
+ public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos)
+ {
+ int numSegments = segmentInfos.Count;
+
+ if (Verbose())
+ Message("findMergesToExpungeDeletes: " + numSegments + " segments");
+
+ MergeSpecification spec = new MergeSpecification();
+ int firstSegmentWithDeletions = - 1;
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = segmentInfos.Info(i);
+ int delCount = writer.NumDeletedDocs(info);
+ if (delCount > 0)
+ {
+ if (Verbose())
+ Message(" segment " + info.name + " has deletions");
+ if (firstSegmentWithDeletions == - 1)
+ firstSegmentWithDeletions = i;
+ else if (i - firstSegmentWithDeletions == mergeFactor)
+ {
+ // We've seen mergeFactor segments in a row with
+ // deletions, so force a merge now:
+ if (Verbose())
+ Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
- firstSegmentWithDeletions = i;
- }
- }
- else if (firstSegmentWithDeletions != - 1)
- {
- // End of a sequence of segments with deletions, so,
- // merge those past segments even if it's fewer than
- // mergeFactor segments
- if (Verbose())
- Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
+ firstSegmentWithDeletions = i;
+ }
+ }
+ else if (firstSegmentWithDeletions != - 1)
+ {
+ // End of a sequence of segments with deletions, so,
+ // merge those past segments even if it's fewer than
+ // mergeFactor segments
+ if (Verbose())
+ Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive");
spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i)));
- firstSegmentWithDeletions = - 1;
- }
- }
-
- if (firstSegmentWithDeletions != - 1)
- {
- if (Verbose())
- Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
+ firstSegmentWithDeletions = - 1;
+ }
+ }
+
+ if (firstSegmentWithDeletions != - 1)
+ {
+ if (Verbose())
+ Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive");
spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments)));
- }
-
- return spec;
- }
-
- /// <summary>Checks if any merges are now necessary and returns a
- /// <see cref="MergePolicy.MergeSpecification" /> if so. A merge
- /// is necessary when there are more than <see cref="MergeFactor" />
- /// segments at a given level. When
- /// multiple levels have too many segments, this method
- /// will return multiple merges, allowing the <see cref="MergeScheduler" />
- /// to use concurrency.
- /// </summary>
- public override MergeSpecification FindMerges(SegmentInfos infos)
- {
-
- int numSegments = infos.Count;
- if (Verbose())
- Message("findMerges: " + numSegments + " segments");
-
- // Compute levels, which is just log (base mergeFactor)
- // of the size of each segment
- float[] levels = new float[numSegments];
- float norm = (float) System.Math.Log(mergeFactor);
-
- for (int i = 0; i < numSegments; i++)
- {
- SegmentInfo info = infos.Info(i);
- long size = Size(info);
-
- // Floor tiny segments
- if (size < 1)
- size = 1;
- levels[i] = (float) System.Math.Log(size) / norm;
- }
-
- float levelFloor;
- if (minMergeSize <= 0)
- levelFloor = (float) 0.0;
- else
- {
- levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
- }
-
- // Now, we quantize the log values into levels. The
- // first level is any segment whose log size is within
- // LEVEL_LOG_SPAN of the max size, or, who has such as
- // segment "to the right". Then, we find the max of all
- // other segments and use that to define the next level
- // segment, etc.
-
- MergeSpecification spec = null;
-
- int start = 0;
- while (start < numSegments)
- {
-
- // Find max level of all segments not already
- // quantized.
- float maxLevel = levels[start];
- for (int i = 1 + start; i < numSegments; i++)
- {
- float level = levels[i];
- if (level > maxLevel)
- maxLevel = level;
- }
-
- // Now search backwards for the rightmost segment that
- // falls into this level:
- float levelBottom;
- if (maxLevel < levelFloor)
- // All remaining segments fall into the min level
- levelBottom = - 1.0F;
- else
- {
- levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
-
- // Force a boundary at the level floor
- if (levelBottom < levelFloor && maxLevel >= levelFloor)
- levelBottom = levelFloor;
- }
-
- int upto = numSegments - 1;
- while (upto >= start)
- {
- if (levels[upto] >= levelBottom)
- {
- break;
- }
- upto--;
- }
- if (Verbose())
- Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
-
- // Finally, record all merges that are viable at this level:
- int end = start + mergeFactor;
- while (end <= 1 + upto)
- {
- bool anyTooLarge = false;
- for (int i = start; i < end; i++)
- {
- SegmentInfo info = infos.Info(i);
- anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
- }
-
- if (!anyTooLarge)
- {
- if (spec == null)
- spec = new MergeSpecification();
- if (Verbose())
- Message(" " + start + " to " + end + ": add this merge");
+ }
+
+ return spec;
+ }
+
+ /// <summary>Checks if any merges are now necessary and returns a
+ /// <see cref="MergePolicy.MergeSpecification" /> if so. A merge
+ /// is necessary when there are more than <see cref="MergeFactor" />
+ /// segments at a given level. When
+ /// multiple levels have too many segments, this method
+ /// will return multiple merges, allowing the <see cref="MergeScheduler" />
+ /// to use concurrency.
+ /// </summary>
+ public override MergeSpecification FindMerges(SegmentInfos infos)
+ {
+
+ int numSegments = infos.Count;
+ if (Verbose())
+ Message("findMerges: " + numSegments + " segments");
+
+ // Compute levels, which is just log (base mergeFactor)
+ // of the size of each segment
+ float[] levels = new float[numSegments];
+ float norm = (float) System.Math.Log(mergeFactor);
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = infos.Info(i);
+ long size = Size(info);
+
+ // Floor tiny segments
+ if (size < 1)
+ size = 1;
+ levels[i] = (float) System.Math.Log(size) / norm;
+ }
+
+ float levelFloor;
+ if (minMergeSize <= 0)
+ levelFloor = (float) 0.0;
+ else
+ {
+ levelFloor = (float) (System.Math.Log(minMergeSize) / norm);
+ }
+
+ // Now, we quantize the log values into levels. The
+ // first level is any segment whose log size is within
+ // LEVEL_LOG_SPAN of the max size, or, who has such as
+ // segment "to the right". Then, we find the max of all
+ // other segments and use that to define the next level
+ // segment, etc.
+
+ MergeSpecification spec = null;
+
+ int start = 0;
+ while (start < numSegments)
+ {
+
+ // Find max level of all segments not already
+ // quantized.
+ float maxLevel = levels[start];
+ for (int i = 1 + start; i < numSegments; i++)
+ {
+ float level = levels[i];
+ if (level > maxLevel)
+ maxLevel = level;
+ }
+
+ // Now search backwards for the rightmost segment that
+ // falls into this level:
+ float levelBottom;
+ if (maxLevel < levelFloor)
+ // All remaining segments fall into the min level
+ levelBottom = - 1.0F;
+ else
+ {
+ levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
+
+ // Force a boundary at the level floor
+ if (levelBottom < levelFloor && maxLevel >= levelFloor)
+ levelBottom = levelFloor;
+ }
+
+ int upto = numSegments - 1;
+ while (upto >= start)
+ {
+ if (levels[upto] >= levelBottom)
+ {
+ break;
+ }
+ upto--;
+ }
+ if (Verbose())
+ Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments");
+
+ // Finally, record all merges that are viable at this level:
+ int end = start + mergeFactor;
+ while (end <= 1 + upto)
+ {
+ bool anyTooLarge = false;
+ for (int i = start; i < end; i++)
+ {
+ SegmentInfo info = infos.Info(i);
+ anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs);
+ }
+
+ if (!anyTooLarge)
+ {
+ if (spec == null)
+ spec = new MergeSpecification();
+ if (Verbose())
+ Message(" " + start + " to " + end + ": add this merge");
spec.Add(MakeOneMerge(infos, infos.Range(start, end)));
- }
- else if (Verbose())
- Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
-
- start = end;
- end = start + mergeFactor;
- }
-
- start = 1 + upto;
- }
-
- return spec;
- }
+ }
+ else if (Verbose())
+ Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
+
+ start = end;
+ end = start + mergeFactor;
+ }
+
+ start = 1 + upto;
+ }
+
+ return spec;
+ }
protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge)
{
@@ -553,28 +553,28 @@ namespace Lucene.Net.Index
return new OneMerge(infosToMerge, doCFS);
}
- /// <summary>
- /// Gets or sets the largest segment (measured by document
- /// count) that may be merged with other segments.
- /// <p/>Determines the largest segment (measured by
- /// document count) that may be merged with other segments.
- /// Small values (e.g., less than 10,000) are best for
- /// interactive indexing, as this limits the length of
- /// pauses while indexing to a few seconds. Larger values
- /// are best for batched indexing and speedier
- /// searches.<p/>
- ///
- /// <p/>The default value is <see cref="int.MaxValue" />.<p/>
- ///
- /// <p/>The default merge policy (<see cref="LogByteSizeMergePolicy" />)
- /// also allows you to set this
- /// limit by net size (in MB) of the segment, using
- /// <see cref="LogByteSizeMergePolicy.MaxMergeMB" />.<p/>
- /// </summary>
- public virtual int MaxMergeDocs
- {
- set { this.maxMergeDocs = value; }
- get { return maxMergeDocs; }
- }
- }
+ /// <summary>
+ /// Gets or sets the largest segment (measured by document
+ /// count) that may be merged with other segments.
+ /// <p/>Determines the largest segment (measured by
+ /// document count) that may be merged with other segments.
+ /// Small values (e.g., less than 10,000) are best for
+ /// interactive indexing, as this limits the length of
+ /// pauses while indexing to a few seconds. Larger values
+ /// are best for batched indexing and speedier
+ /// searches.<p/>
+ ///
+ /// <p/>The default value is <see cref="int.MaxValue" />.<p/>
+ ///
+ /// <p/>The default merge policy (<see cref="LogByteSizeMergePolicy" />)
+ /// also allows you to set this
+ /// limit by net size (in MB) of the segment, using
+ /// <see cref="LogByteSizeMergePolicy.MaxMergeMB" />.<p/>
+ /// </summary>
+ public virtual int MaxMergeDocs
+ {
+ set { this.maxMergeDocs = value; }
+ get { return maxMergeDocs; }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/MergeDocIDRemapper.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/MergeDocIDRemapper.cs b/src/core/Index/MergeDocIDRemapper.cs
index 2771b53..5c06721 100644
--- a/src/core/Index/MergeDocIDRemapper.cs
+++ b/src/core/Index/MergeDocIDRemapper.cs
@@ -20,108 +20,108 @@ using Lucene.Net.Support;
namespace Lucene.Net.Index
{
-
- /// <summary>Remaps docIDs after a merge has completed, where the
- /// merged segments had at least one deletion. This is used
- /// to renumber the buffered deletes in IndexWriter when a
- /// merge of segments with deletions commits.
- /// </summary>
-
- sealed class MergeDocIDRemapper
- {
- internal int[] starts; // used for binary search of mapped docID
- internal int[] newStarts; // starts, minus the deletes
- internal int[][] docMaps; // maps docIDs in the merged set
- internal int minDocID; // minimum docID that needs renumbering
- internal int maxDocID; // 1+ the max docID that needs renumbering
- internal int docShift; // total # deleted docs that were compacted by this merge
-
- public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
- {
- this.docMaps = docMaps;
- SegmentInfo firstSegment = merge.segments.Info(0);
- int i = 0;
- while (true)
- {
- SegmentInfo info = infos.Info(i);
- if (info.Equals(firstSegment))
- break;
- minDocID += info.docCount;
- i++;
- }
-
- int numDocs = 0;
- for (int j = 0; j < docMaps.Length; i++, j++)
- {
- numDocs += infos.Info(i).docCount;
- System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
- }
- maxDocID = minDocID + numDocs;
-
- starts = new int[docMaps.Length];
- newStarts = new int[docMaps.Length];
-
- starts[0] = minDocID;
- newStarts[0] = minDocID;
- for (i = 1; i < docMaps.Length; i++)
- {
- int lastDocCount = merge.segments.Info(i - 1).docCount;
- starts[i] = starts[i - 1] + lastDocCount;
- newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
- }
- docShift = numDocs - mergedDocCount;
-
- // There are rare cases when docShift is 0. It happens
- // if you try to delete a docID that's out of bounds,
- // because the SegmentReader still allocates deletedDocs
- // and pretends it has deletions ... so we can't make
- // this assert here
- // assert docShift > 0;
-
- // Make sure it all adds up:
- System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
- }
-
- public int Remap(int oldDocID)
- {
- if (oldDocID < minDocID)
- // Unaffected by merge
- return oldDocID;
- else if (oldDocID >= maxDocID)
- // This doc was "after" the merge, so simple shift
- return oldDocID - docShift;
- else
- {
- // Binary search to locate this document & find its new docID
- int lo = 0; // search starts array
- int hi = docMaps.Length - 1; // for first element less
-
- while (hi >= lo)
- {
- int mid = Number.URShift((lo + hi), 1);
- int midValue = starts[mid];
- if (oldDocID < midValue)
- hi = mid - 1;
- else if (oldDocID > midValue)
- lo = mid + 1;
- else
- {
- // found a match
- while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue)
- {
- mid++; // scan to last match
- }
- if (docMaps[mid] != null)
- return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]];
- else
- return newStarts[mid] + oldDocID - starts[mid];
- }
- }
- if (docMaps[hi] != null)
- return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]];
- else
- return newStarts[hi] + oldDocID - starts[hi];
- }
- }
- }
+
+ /// <summary>Remaps docIDs after a merge has completed, where the
+ /// merged segments had at least one deletion. This is used
+ /// to renumber the buffered deletes in IndexWriter when a
+ /// merge of segments with deletions commits.
+ /// </summary>
+
+ sealed class MergeDocIDRemapper
+ {
+ internal int[] starts; // used for binary search of mapped docID
+ internal int[] newStarts; // starts, minus the deletes
+ internal int[][] docMaps; // maps docIDs in the merged set
+ internal int minDocID; // minimum docID that needs renumbering
+ internal int maxDocID; // 1+ the max docID that needs renumbering
+ internal int docShift; // total # deleted docs that were compacted by this merge
+
+ public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount)
+ {
+ this.docMaps = docMaps;
+ SegmentInfo firstSegment = merge.segments.Info(0);
+ int i = 0;
+ while (true)
+ {
+ SegmentInfo info = infos.Info(i);
+ if (info.Equals(firstSegment))
+ break;
+ minDocID += info.docCount;
+ i++;
+ }
+
+ int numDocs = 0;
+ for (int j = 0; j < docMaps.Length; i++, j++)
+ {
+ numDocs += infos.Info(i).docCount;
+ System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j)));
+ }
+ maxDocID = minDocID + numDocs;
+
+ starts = new int[docMaps.Length];
+ newStarts = new int[docMaps.Length];
+
+ starts[0] = minDocID;
+ newStarts[0] = minDocID;
+ for (i = 1; i < docMaps.Length; i++)
+ {
+ int lastDocCount = merge.segments.Info(i - 1).docCount;
+ starts[i] = starts[i - 1] + lastDocCount;
+ newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1];
+ }
+ docShift = numDocs - mergedDocCount;
+
+ // There are rare cases when docShift is 0. It happens
+ // if you try to delete a docID that's out of bounds,
+ // because the SegmentReader still allocates deletedDocs
+ // and pretends it has deletions ... so we can't make
+ // this assert here
+ // assert docShift > 0;
+
+ // Make sure it all adds up:
+ System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1]));
+ }
+
+ public int Remap(int oldDocID)
+ {
+ if (oldDocID < minDocID)
+ // Unaffected by merge
+ return oldDocID;
+ else if (oldDocID >= maxDocID)
+ // This doc was "after" the merge, so simple shift
+ return oldDocID - docShift;
+ else
+ {
+ // Binary search to locate this document & find its new docID
+ int lo = 0; // search starts array
+ int hi = docMaps.Length - 1; // for first element less
+
+ while (hi >= lo)
+ {
+ int mid = Number.URShift((lo + hi), 1);
+ int midValue = starts[mid];
+ if (oldDocID < midValue)
+ hi = mid - 1;
+ else if (oldDocID > midValue)
+ lo = mid + 1;
+ else
+ {
+ // found a match
+ while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue)
+ {
+ mid++; // scan to last match
+ }
+ if (docMaps[mid] != null)
+ return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]];
+ else
+ return newStarts[mid] + oldDocID - starts[mid];
+ }
+ }
+ if (docMaps[hi] != null)
+ return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]];
+ else
+ return newStarts[hi] + oldDocID - starts[hi];
+ }
+ }
+ }
}
\ No newline at end of file