You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2008/06/25 04:52:24 UTC
svn commit: r671404 [8/10] -
/incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentInfos.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
*/
using System;
+
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
@@ -24,9 +25,9 @@
{
[Serializable]
- public sealed class SegmentInfos : System.Collections.ArrayList
+ sealed public class SegmentInfos : System.Collections.ArrayList
{
- private class AnonymousClassFindSegmentsFile:FindSegmentsFile
+ private class AnonymousClassFindSegmentsFile : FindSegmentsFile
{
private void InitBlock(SegmentInfos enclosingInstance)
{
@@ -41,23 +42,24 @@
}
}
- internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1)
+ internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1) : base(Param1)
{
InitBlock(enclosingInstance);
}
- public override System.Object DoBody(System.String segmentFileName)
+ protected internal override System.Object DoBody(System.String segmentFileName)
{
Enclosing_Instance.Read(directory, segmentFileName);
return null;
}
}
- private class AnonymousClassFindSegmentsFile1:FindSegmentsFile
+
+ private class AnonymousClassFindSegmentsFile1 : FindSegmentsFile
{
- internal AnonymousClassFindSegmentsFile1(Lucene.Net.Store.Directory Param1):base(Param1)
+ internal AnonymousClassFindSegmentsFile1(Lucene.Net.Store.Directory Param1) : base(Param1)
{
}
- public override System.Object DoBody(System.String segmentFileName)
+ protected internal override System.Object DoBody(System.String segmentFileName)
{
IndexInput input = directory.OpenInput(segmentFileName);
@@ -69,8 +71,8 @@
format = input.ReadInt();
if (format < 0)
{
- if (format < Lucene.Net.Index.SegmentInfos.FORMAT_SINGLE_NORM_FILE)
- throw new System.IO.IOException("Unknown format version: " + format);
+ if (format < Lucene.Net.Index.SegmentInfos.CURRENT_FORMAT)
+ throw new CorruptIndexException("Unknown format version: " + format);
version = input.ReadLong(); // read version
}
}
@@ -90,6 +92,7 @@
}
}
+
/// <summary>The file format version, a negative number. </summary>
/* Works since counter, the old 1st entry, is always >= 0 */
public const int FORMAT = - 1;
@@ -105,18 +108,25 @@
/// </summary>
public const int FORMAT_LOCKLESS = - 2;
- /// <summary>This is the current file format written. It adds a
- /// "hasSingleNormFile" flag into each segment info.
+ /// <summary>This format adds a "hasSingleNormFile" flag into each segment info.
/// See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
/// for details.
/// </summary>
public const int FORMAT_SINGLE_NORM_FILE = - 3;
+ /// <summary>This format allows multiple segments to share a single
+ /// vectors and stored fields file.
+ /// </summary>
+ public const int FORMAT_SHARED_DOC_STORE = - 4;
+
+ /* This must always point to the most recent file format. */
+ private static readonly int CURRENT_FORMAT = FORMAT_SHARED_DOC_STORE;
+
public int counter = 0; // used to name new segments
/// <summary> counts how often the index has been changed by adding or deleting docs.
/// starting with the current time in milliseconds forces to create unique version numbers.
/// </summary>
- private long version = System.DateTime.Now.Millisecond;
+ private long version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
private long generation = 0; // generation of the "segments_N" for the next commit
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
@@ -124,9 +134,9 @@
// there was an IOException that had interrupted a commit
/// <summary> If non-null, information about loading segments_N files</summary>
- /// <seealso cref="#setInfoStream.">
+ /// <seealso cref="setInfoStream.">
/// </seealso>
- private static System.IO.TextWriter infoStream;
+ private static System.IO.StreamWriter infoStream;
public SegmentInfo Info(int i)
{
@@ -146,31 +156,15 @@
return - 1;
}
long max = - 1;
- int prefixLen = IndexFileNames.SEGMENTS.Length + 1;
for (int i = 0; i < files.Length; i++)
{
- System.String file = (new System.IO.FileInfo(files[i])).Name;
+ System.String file = files[i];
if (file.StartsWith(IndexFileNames.SEGMENTS) && !file.Equals(IndexFileNames.SEGMENTS_GEN))
{
- if (file.Equals(IndexFileNames.SEGMENTS))
- {
- // Pre lock-less commits:
- if (max == - 1)
- {
- max = 0;
- }
- }
- else
+ long gen = GenerationFromSegmentsFileName(file);
+ if (gen > max)
{
-#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
- long v = Lucene.Net.Documents.NumberTools.ToLong(file.Substring(prefixLen));
-#else
- long v = System.Convert.ToInt64(file.Substring(prefixLen), 16);
-#endif
- if (v > max)
- {
- max = v;
- }
+ max = gen;
}
}
}
@@ -188,7 +182,7 @@
System.String[] files = directory.List();
if (files == null)
{
- throw new System.IO.IOException("Cannot read directory " + directory);
+ throw new System.IO.IOException("cannot read directory " + directory + ": list() returned null");
}
return GetCurrentSegmentGeneration(files);
}
@@ -222,6 +216,26 @@
return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration);
}
+ /// <summary> Parse the generation off the segments file name and
+ /// return it.
+ /// </summary>
+ public static long GenerationFromSegmentsFileName(System.String fileName)
+ {
+ if (fileName.Equals(IndexFileNames.SEGMENTS))
+ {
+ return 0;
+ }
+ else if (fileName.StartsWith(IndexFileNames.SEGMENTS))
+ {
+ return SupportClass.Number.ToInt64(fileName.Substring(1 + IndexFileNames.SEGMENTS.Length));
+ }
+ else
+ {
+ throw new System.ArgumentException("fileName \"" + fileName + "\" is not a segments file");
+ }
+ }
+
+
/// <summary> Get the next segments_N filename that will be written.</summary>
public System.String GetNextSegmentFileName()
{
@@ -246,24 +260,19 @@
/// </param>
/// <param name="segmentFileName">-- segment file to load
/// </param>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
public void Read(Directory directory, System.String segmentFileName)
{
bool success = false;
+ // Clear any previous segments:
+ Clear();
+
IndexInput input = directory.OpenInput(segmentFileName);
- if (segmentFileName.Equals(IndexFileNames.SEGMENTS))
- {
- generation = 0;
- }
- else
- {
-#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
- generation = Lucene.Net.Documents.NumberTools.ToLong(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length));
-#else
- generation = System.Convert.ToInt64(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length), 16);
-#endif
- }
+ generation = GenerationFromSegmentsFileName(segmentFileName);
+
lastGeneration = generation;
try
@@ -273,8 +282,8 @@
{
// file contains explicit format info
// check that it is a format we can understand
- if (format < FORMAT_SINGLE_NORM_FILE)
- throw new System.IO.IOException("Unknown format version: " + format);
+ if (format < CURRENT_FORMAT)
+ throw new CorruptIndexException("Unknown format version: " + format);
version = input.ReadLong(); // read version
counter = input.ReadInt(); // read counter
}
@@ -294,7 +303,7 @@
{
// in old format the version number may be at the end of the file
if (input.GetFilePointer() >= input.Length())
- version = System.DateTime.Now.Millisecond;
+ version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
// old file format without version number
else
version = input.ReadLong(); // read version
@@ -312,15 +321,18 @@
}
}
}
+
/// <summary> This version of read uses the retry logic (for lock-less
/// commits) to find the right segments file to load.
/// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
public void Read(Directory directory)
{
generation = lastGeneration = - 1;
- new AnonymousClassFindSegmentsFile(this, directory).run();
+ new AnonymousClassFindSegmentsFile(this, directory).Run();
}
public void Write(Directory directory)
@@ -340,9 +352,11 @@
IndexOutput output = directory.CreateOutput(segmentFileName);
+ bool success = false;
+
try
{
- output.WriteInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
+ output.WriteInt(CURRENT_FORMAT); // write FORMAT
output.WriteLong(++version); // every write changes
// the index
output.WriteInt(counter); // write counter
@@ -354,7 +368,20 @@
}
finally
{
- output.Close();
+ try
+ {
+ output.Close();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Try not to leave a truncated segments_N file in
+ // the index:
+ directory.DeleteFile(segmentFileName);
+ }
+ }
}
try
@@ -386,38 +413,44 @@
public override System.Object Clone()
{
- SegmentInfos sis = new SegmentInfos();
+ return new SegmentInfos(this);
+ }
- // Copy Fields. const and static fields are ignored
- sis.counter = this.counter;
- sis.version = this.version;
- sis.generation = this.generation;
- sis.lastGeneration = this.lastGeneration;
+ private SegmentInfos(SegmentInfos si) : base(si)
+ {
+ }
- for (int i = 0; i < this.Count; i++)
- {
- sis.Add(((SegmentInfo)this[i]).Clone());
- }
- return sis;
- }
+ public SegmentInfos()
+ {
+ }
/// <summary> version number when this SegmentInfos was generated.</summary>
public long GetVersion()
{
return version;
}
+ public long GetGeneration()
+ {
+ return generation;
+ }
+ public long GetLastGeneration()
+ {
+ return lastGeneration;
+ }
/// <summary> Current version number from segments file.</summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
public static long ReadCurrentVersion(Directory directory)
{
- return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).run());
+ return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run());
}
/// <summary>If non-null, information about retries when loading
/// the segments file will be printed to this.
/// </summary>
- public static void SetInfoStream(System.IO.TextWriter infoStream)
+ public static void SetInfoStream(System.IO.StreamWriter infoStream)
{
SegmentInfos.infoStream = infoStream;
}
@@ -438,7 +471,7 @@
defaultGenFileRetryCount = count;
}
- /// <seealso cref="#setDefaultGenFileRetryCount">
+ /// <seealso cref="setDefaultGenFileRetryCount">
/// </seealso>
public static int GetDefaultGenFileRetryCount()
{
@@ -453,7 +486,7 @@
defaultGenFileRetryPauseMsec = msec;
}
- /// <seealso cref="#setDefaultGenFileRetryPauseMsec">
+ /// <seealso cref="setDefaultGenFileRetryPauseMsec">
/// </seealso>
public static int GetDefaultGenFileRetryPauseMsec()
{
@@ -470,16 +503,16 @@
{
defaultGenLookaheadCount = count;
}
- /// <seealso cref="#setDefaultGenLookaheadCount">
+ /// <seealso cref="setDefaultGenLookaheadCount">
/// </seealso>
public static int GetDefaultGenLookahedCount()
{
return defaultGenLookaheadCount;
}
- /// <seealso cref="#setInfoStream">
+ /// <seealso cref="setInfoStream">
/// </seealso>
- public static System.IO.TextWriter GetInfoStream()
+ public static System.IO.StreamWriter GetInfoStream()
{
return infoStream;
}
@@ -488,7 +521,7 @@
{
if (infoStream != null)
{
- infoStream.WriteLine(SupportClass.ThreadClass.Current().Name + ": " + message);
+ infoStream.WriteLine("SIS [" + SupportClass.ThreadClass.Current().Name + "]: " + message);
}
}
@@ -516,7 +549,7 @@
this.directory = directory;
}
- public System.Object run()
+ public System.Object Run()
{
System.String segmentFileName = null;
long lastGen = - 1;
@@ -539,116 +572,131 @@
// it.
// We have three methods for determining the current
- // generation. We try each in sequence.
+ // generation. We try the first two in parallel, and
+ // fall back to the third when necessary.
while (true)
{
- // Method 1: list the directory and use the highest
- // segments_N file. This method works well as long
- // as there is no stale caching on the directory
- // contents:
- System.String[] files = null;
-
if (0 == method)
{
+
+ // Method 1: list the directory and use the highest
+ // segments_N file. This method works well as long
+ // as there is no stale caching on the directory
+ // contents (NOTE: NFS clients often have such stale
+ // caching):
+ System.String[] files = null;
+
+ long genA = - 1;
+
if (directory != null)
- {
files = directory.List();
- }
else
{
files = System.IO.Directory.GetFileSystemEntries(fileDirectory.FullName);
- for (int i = 0; i < files.Length; i++)
- {
- System.IO.FileInfo fi = new System.IO.FileInfo(files[i]);
- files[i] = fi.Name;
- }
- }
-
- gen = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);
-
- if (gen == - 1)
- {
- System.String s = "";
- for (int i = 0; i < files.Length; i++)
- {
- s += (" " + files[i]);
- }
- throw new System.IO.FileNotFoundException("no segments* file found: files:" + s);
}
- }
-
- // Method 2 (fallback if Method 1 isn't reliable):
- // if the directory listing seems to be stale, then
- // try loading the "segments.gen" file.
- if (1 == method || (0 == method && lastGen == gen && retry))
- {
- method = 1;
+ if (files != null)
+ genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);
- for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
+ Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA);
+
+ // Method 2: open segments.gen and read its
+ // contents. Then we take the larger of the two
+ // gen's. This way, if either approach is hitting
+ // a stale cache (NFS) we have a better chance of
+ // getting the right generation.
+ long genB = - 1;
+ if (directory != null)
{
- IndexInput genInput = null;
- try
- {
- genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
- }
- catch (System.IO.IOException e)
+ for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
{
- Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
- }
- if (genInput != null)
- {
-
+ IndexInput genInput = null;
try
{
- int version = genInput.ReadInt();
- if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
+ genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
+ }
+ catch (System.IO.FileNotFoundException e)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e);
+ break;
+ }
+ catch (System.IO.IOException e)
+ {
+ Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
+ }
+
+ if (genInput != null)
+ {
+ try
{
- long gen0 = genInput.ReadLong();
- long gen1 = genInput.ReadLong();
- Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
- if (gen0 == gen1)
+ int version = genInput.ReadInt();
+ if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
{
- // The file is consistent.
- if (gen0 > gen)
+ long gen0 = genInput.ReadLong();
+ long gen1 = genInput.ReadLong();
+ Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
+ if (gen0 == gen1)
{
- Lucene.Net.Index.SegmentInfos.Message("fallback to '" + IndexFileNames.SEGMENTS_GEN + "' check: now try generation " + gen0 + " > " + gen);
- gen = gen0;
+ // The file is consistent.
+ genB = gen0;
+ break;
}
- break;
}
}
+ catch (System.IO.IOException err2)
+ {
+ // will retry
+ }
+ finally
+ {
+ genInput.Close();
+ }
}
- catch (System.IO.IOException err2)
+ try
{
- // will retry
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
}
- finally
+ catch (System.Threading.ThreadInterruptedException e)
{
- genInput.Close();
+ // will retry
}
}
- try
- {
- System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
- }
- catch (System.Threading.ThreadInterruptedException e)
+ }
+
+ Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);
+
+ // Pick the larger of the two gen's:
+ if (genA > genB)
+ gen = genA;
+ else
+ gen = genB;
+
+ if (gen == - 1)
+ {
+ // Neither approach found a generation
+ System.String s;
+ if (files != null)
{
- // will retry
+ s = "";
+ for (int i = 0; i < files.Length; i++)
+ s += (" " + files[i]);
}
+ else
+ s = " null";
+ throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + s);
}
}
- // Method 3 (fallback if Methods 2 & 3 are not
- // reliable): since both directory cache and file
- // contents cache seem to be stale, just advance the
- // generation.
- if (2 == method || (1 == method && lastGen == gen && retry))
+ // Third method (fallback if first & second methods
+ // are not reliable): since both directory cache and
+ // file contents cache seem to be stale, just
+ // advance the generation.
+ if (1 == method || (0 == method && lastGen == gen && retry))
{
- method = 2;
+ method = 1;
if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount)
{
@@ -720,7 +768,20 @@
// try it if so:
System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);
- if (directory.FileExists(prevSegmentFileName))
+ bool prevExists;
+ if (directory != null)
+ prevExists = directory.FileExists(prevSegmentFileName);
+ else
+ {
+ bool tmpBool;
+ if (System.IO.File.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName))
+ tmpBool = true;
+ else
+ tmpBool = System.IO.Directory.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName);
+ prevExists = tmpBool;
+ }
+
+ if (prevExists)
{
Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
try
@@ -747,7 +808,19 @@
/// during the processing that could have been caused by
/// a writer committing.
/// </summary>
- public abstract System.Object DoBody(System.String segmentFileName);
+ protected internal abstract System.Object DoBody(System.String segmentFileName);
+ }
+
+ /// <summary> Returns a new SegmentInfos containg the SegmentInfo
+ /// instances in the specified range first (inclusive) to
+ /// last (exclusive), so total number of segments returned
+ /// is last-first.
+ /// </summary>
+ public SegmentInfos Range(int first, int last)
+ {
+ SegmentInfos infos = new SegmentInfos();
+ infos.AddRange((System.Collections.IList) ((System.Collections.ArrayList) this).GetRange(first, last - first));
+ return infos;
}
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMergeQueue.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
*/
using System;
+
using PriorityQueue = Lucene.Net.Util.PriorityQueue;
namespace Lucene.Net.Index
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMerger.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs Tue Jun 24 19:52:22 2008
@@ -16,11 +16,12 @@
*/
using System;
+
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
-using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
namespace Lucene.Net.Index
{
@@ -33,12 +34,13 @@
///
///
/// </summary>
- /// <seealso cref="#merge">
+ /// <seealso cref="merge">
/// </seealso>
- /// <seealso cref="#add">
+ /// <seealso cref="add">
/// </seealso>
- public sealed class SegmentMerger
+ sealed class SegmentMerger
{
+ [Serializable]
private class AnonymousClassFieldSelector : FieldSelector
{
public AnonymousClassFieldSelector(SegmentMerger enclosingInstance)
@@ -69,7 +71,7 @@
}
/// <summary>norms header placeholder </summary>
- internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', (byte) 255};
+ internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', unchecked((byte) -1)};
private Directory directory;
private System.String segment;
@@ -78,6 +80,21 @@
private System.Collections.ArrayList readers = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
private FieldInfos fieldInfos;
+ private int mergedDocs;
+
+ private CheckAbort checkAbort;
+
+ // Whether we should merge doc stores (stored fields and
+ // vectors files). When all segments we are merging
+ // already share the same doc store files, we don't need
+ // to merge the doc stores.
+ private bool mergeDocStores;
+
+ /// <summary>Maximum number of contiguous documents to bulk-copy
+ /// when merging stored fields
+ /// </summary>
+ private const int MAX_RAW_MERGE_DOCS = 4192;
+
/// <summary>This ctor used only by test code.
///
/// </summary>
@@ -85,25 +102,27 @@
/// </param>
/// <param name="name">The name of the new segment
/// </param>
- public SegmentMerger(Directory dir, System.String name)
+ internal SegmentMerger(Directory dir, System.String name)
{
InitBlock();
directory = dir;
segment = name;
}
- internal SegmentMerger(IndexWriter writer, System.String name)
+ internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge)
{
InitBlock();
directory = writer.GetDirectory();
segment = name;
+ if (merge != null)
+ checkAbort = new CheckAbort(merge, directory);
termIndexInterval = writer.GetTermIndexInterval();
}
/// <summary> Add an IndexReader to the collection of readers that are to be merged</summary>
- /// <param name="">reader
+ /// <param name="reader">
/// </param>
- public void Add(IndexReader reader)
+ internal void Add(IndexReader reader)
{
readers.Add(reader);
}
@@ -121,26 +140,50 @@
/// <summary> Merges the readers specified by the {@link #add} method into the directory passed to the constructor</summary>
/// <returns> The number of documents that were merged
/// </returns>
- /// <throws> IOException </throws>
- public int Merge()
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ internal int Merge()
+ {
+ return Merge(true);
+ }
+
+ /// <summary> Merges the readers specified by the {@link #add} method
+ /// into the directory passed to the constructor.
+ /// </summary>
+ /// <param name="mergeDocStores">if false, we will not merge the
+ /// stored fields nor vectors files
+ /// </param>
+ /// <returns> The number of documents that were merged
+ /// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ internal int Merge(bool mergeDocStores)
{
- int value_Renamed;
- value_Renamed = MergeFields();
+ this.mergeDocStores = mergeDocStores;
+
+ // NOTE: it's important to add calls to
+ // checkAbort.work(...) if you make any changes to this
+ // method that will spend alot of time. The frequency
+ // of this check impacts how long
+ // IndexWriter.close(false) takes to actually stop the
+ // threads.
+
+ mergedDocs = MergeFields();
MergeTerms();
MergeNorms();
- if (fieldInfos.HasVectors())
+ if (mergeDocStores && fieldInfos.HasVectors())
MergeVectors();
- return value_Renamed;
+ return mergedDocs;
}
/// <summary> close all IndexReaders that have been added.
/// Should not be called before merge().
/// </summary>
/// <throws> IOException </throws>
- public void CloseReaders()
+ internal void CloseReaders()
{
for (int i = 0; i < readers.Count; i++)
{
@@ -150,16 +193,18 @@
}
}
- public System.Collections.ArrayList CreateCompoundFile(System.String fileName)
+ internal System.Collections.ArrayList CreateCompoundFile(System.String fileName)
{
- CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName);
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.Length + 1));
// Basic files
for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
{
- files.Add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
+ System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+ if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
+ files.Add(segment + "." + ext);
}
// Fieldable norm files
@@ -174,7 +219,7 @@
}
// Vector files
- if (fieldInfos.HasVectors())
+ if (fieldInfos.HasVectors() && mergeDocStores)
{
for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
{
@@ -195,62 +240,169 @@
return files;
}
- private void AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+ private void AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads)
{
System.Collections.IEnumerator i = names.GetEnumerator();
while (i.MoveNext())
{
- System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
- System.String field = (System.String) e.Key;
- fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field));
+ System.String field = (System.String) i.Current;
+ fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads);
}
}
/// <summary> </summary>
/// <returns> The number of documents in all of the readers
/// </returns>
- /// <throws> IOException </throws>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
private int MergeFields()
{
- fieldInfos = new FieldInfos(); // merge field names
- int docCount = 0;
+
+ if (!mergeDocStores)
+ {
+ // When we are not merging by doc stores, that means
+ // all segments were written as part of a single
+ // autoCommit=false IndexWriter session, so their field
+ // name -> number mapping are the same. So, we start
+ // with the fieldInfos of the last segment in this
+ // case, to keep that numbering.
+ SegmentReader sr = (SegmentReader) readers[readers.Count - 1];
+ fieldInfos = (FieldInfos) sr.fieldInfos.Clone();
+ }
+ else
+ {
+ fieldInfos = new FieldInfos(); // merge field names
+ }
+
for (int i = 0; i < readers.Count; i++)
{
IndexReader reader = (IndexReader) readers[i];
- AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
- AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
- AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
- AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
- AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
- fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
+ if (reader is SegmentReader)
+ {
+ SegmentReader segmentReader = (SegmentReader) reader;
+ for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
+ {
+ FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
+ fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
+ }
+ }
+ else
+ {
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
+ AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
+ fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
+ }
}
fieldInfos.Write(directory, segment + ".fnm");
- FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
-
- // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
- // in merge mode, we use this FieldSelector
- FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
+ int docCount = 0;
- try
+ if (mergeDocStores)
{
+
+ // If the i'th reader is a SegmentReader and has
+ // identical fieldName -> number mapping, then this
+ // array will be non-null at position i:
+ SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];
+
+ // If this reader is a SegmentReader, and all of its
+ // field name -> number mappings match the "merged"
+ // FieldInfos, then we can do a bulk copy of the
+ // stored fields:
for (int i = 0; i < readers.Count; i++)
{
IndexReader reader = (IndexReader) readers[i];
- int maxDoc = reader.MaxDoc();
- for (int j = 0; j < maxDoc; j++)
- if (!reader.IsDeleted(j))
+ if (reader is SegmentReader)
+ {
+ SegmentReader segmentReader = (SegmentReader) reader;
+ bool same = true;
+ FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos();
+ for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
+ same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
+ if (same)
{
- // skip deleted docs
- fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
- docCount++;
+ matchingSegmentReaders[i] = segmentReader;
}
+ }
+ }
+
+ // Used for bulk-reading raw bytes for stored fields
+ int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
+
+ // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
+ // in merge mode, we use this FieldSelector
+ FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
+
+ // merge field values
+ FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
+
+ try
+ {
+ for (int i = 0; i < readers.Count; i++)
+ {
+ IndexReader reader = (IndexReader) readers[i];
+ SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
+ FieldsReader matchingFieldsReader;
+ if (matchingSegmentReader != null)
+ matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
+ else
+ matchingFieldsReader = null;
+ int maxDoc = reader.MaxDoc();
+ for (int j = 0; j < maxDoc; )
+ {
+ if (!reader.IsDeleted(j))
+ {
+ // skip deleted docs
+ if (matchingSegmentReader != null)
+ {
+ // We can optimize this case (doing a bulk
+ // byte copy) since the field numbers are
+ // identical
+ int start = j;
+ int numDocs = 0;
+ do
+ {
+ j++;
+ numDocs++;
+ }
+ while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);
+
+ IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
+ fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
+ docCount += numDocs;
+ if (checkAbort != null)
+ checkAbort.Work(300 * numDocs);
+ }
+ else
+ {
+ fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
+ j++;
+ docCount++;
+ if (checkAbort != null)
+ checkAbort.Work(300);
+ }
+ }
+ else
+ j++;
+ }
+ }
+ }
+ finally
+ {
+ fieldsWriter.Close();
}
}
- finally
- {
- fieldsWriter.Close();
- }
+ // If we are skipping the doc stores, that means there
+ // are no deletions in any of these segments, so we
+ // just sum numDocs() of each segment to get total docCount
+ else
+ for (int i = 0; i < readers.Count; i++)
+ docCount += ((IndexReader) readers[i]).NumDocs();
+
return docCount;
}
@@ -272,6 +424,8 @@
if (reader.IsDeleted(docNum))
continue;
termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
+ if (checkAbort != null)
+ checkAbort.Work(300);
}
}
}
@@ -285,7 +439,9 @@
private IndexOutput proxOutput = null;
private TermInfosWriter termInfosWriter = null;
private int skipInterval;
+ private int maxSkipLevels;
private SegmentMergeQueue queue = null;
+ private DefaultSkipListWriter skipListWriter = null;
private void MergeTerms()
{
@@ -295,6 +451,8 @@
proxOutput = directory.CreateOutput(segment + ".prx");
termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
skipInterval = termInfosWriter.skipInterval;
+ maxSkipLevels = termInfosWriter.maxSkipLevels;
+ skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput);
queue = new SegmentMergeQueue(readers.Count);
MergeTermInfos();
@@ -343,7 +501,10 @@
top = (SegmentMergeInfo) queue.Top();
}
- MergeTermInfo(match, matchSize); // add new TermInfo
+ int df = MergeTermInfo(match, matchSize); // add new TermInfo
+
+ if (checkAbort != null)
+ checkAbort.Work(df / 3.0);
while (matchSize > 0)
{
@@ -368,14 +529,16 @@
/// </param>
/// <param name="n">number of cells in the array actually occupied
/// </param>
- private void MergeTermInfo(SegmentMergeInfo[] smis, int n)
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ private int MergeTermInfo(SegmentMergeInfo[] smis, int n)
{
long freqPointer = freqOutput.GetFilePointer();
long proxPointer = proxOutput.GetFilePointer();
int df = AppendPostings(smis, n); // append posting data
- long skipPointer = WriteSkip();
+ long skipPointer = skipListWriter.WriteSkip(freqOutput);
if (df > 0)
{
@@ -383,8 +546,12 @@
termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
termInfosWriter.Add(smis[0].term, termInfo);
}
+
+ return df;
}
+ private byte[] payloadBuffer = null;
+
/// <summary>Process postings from multiple segments all positioned on the
/// same term. Writes out merged entries into freqOutput and
/// the proxOutput streams.
@@ -396,15 +563,20 @@
/// </param>
/// <returns> number of documents across all segments where this term was found
/// </returns>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
private int AppendPostings(SegmentMergeInfo[] smis, int n)
{
int lastDoc = 0;
int df = 0; // number of docs w/ term
- ResetSkip();
+ skipListWriter.ResetSkip();
+ bool storePayloads = fieldInfos.FieldInfo(smis[0].term.field).storePayloads;
+ int lastPayloadLength = - 1; // ensures that we write the first length
for (int i = 0; i < n; i++)
{
SegmentMergeInfo smi = smis[i];
TermPositions postings = smi.GetPositions();
+ System.Diagnostics.Debug.Assert(postings != null);
int base_Renamed = smi.base_Renamed;
int[] docMap = smi.GetDocMap();
postings.Seek(smi.termEnum);
@@ -416,13 +588,14 @@
doc += base_Renamed; // convert to merged space
if (doc < 0 || (df > 0 && doc <= lastDoc))
- throw new System.SystemException("docs out of order (" + doc + " <= " + lastDoc + " )");
+ throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )");
df++;
if ((df % skipInterval) == 0)
{
- BufferSkip(lastDoc);
+ skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength);
+ skipListWriter.BufferSkip(df);
}
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
@@ -439,11 +612,41 @@
freqOutput.WriteVInt(freq); // write frequency in doc
}
+ /** See {@link DocumentWriter#writePostings(Posting[], String) for
+ * documentation about the encoding of positions and payloads
+ */
int lastPosition = 0; // write position deltas
for (int j = 0; j < freq; j++)
{
int position = postings.NextPosition();
- proxOutput.WriteVInt(position - lastPosition);
+ int delta = position - lastPosition;
+ if (storePayloads)
+ {
+ int payloadLength = postings.GetPayloadLength();
+ if (payloadLength == lastPayloadLength)
+ {
+ proxOutput.WriteVInt(delta * 2);
+ }
+ else
+ {
+ proxOutput.WriteVInt(delta * 2 + 1);
+ proxOutput.WriteVInt(payloadLength);
+ lastPayloadLength = payloadLength;
+ }
+ if (payloadLength > 0)
+ {
+ if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
+ {
+ payloadBuffer = new byte[payloadLength];
+ }
+ postings.GetPayload(payloadBuffer, 0);
+ proxOutput.WriteBytes(payloadBuffer, 0, payloadLength);
+ }
+ }
+ else
+ {
+ proxOutput.WriteVInt(delta);
+ }
lastPosition = position;
}
}
@@ -451,40 +654,6 @@
return df;
}
- private RAMOutputStream skipBuffer = new RAMOutputStream();
- private int lastSkipDoc;
- private long lastSkipFreqPointer;
- private long lastSkipProxPointer;
-
- private void ResetSkip()
- {
- skipBuffer.Reset();
- lastSkipDoc = 0;
- lastSkipFreqPointer = freqOutput.GetFilePointer();
- lastSkipProxPointer = proxOutput.GetFilePointer();
- }
-
- private void BufferSkip(int doc)
- {
- long freqPointer = freqOutput.GetFilePointer();
- long proxPointer = proxOutput.GetFilePointer();
-
- skipBuffer.WriteVInt(doc - lastSkipDoc);
- skipBuffer.WriteVInt((int) (freqPointer - lastSkipFreqPointer));
- skipBuffer.WriteVInt((int) (proxPointer - lastSkipProxPointer));
-
- lastSkipDoc = doc;
- lastSkipFreqPointer = freqPointer;
- lastSkipProxPointer = proxPointer;
- }
-
- private long WriteSkip()
- {
- long skipPointer = freqOutput.GetFilePointer();
- skipBuffer.WriteTo(freqOutput);
- return skipPointer;
- }
-
private void MergeNorms()
{
byte[] normBuffer = null;
@@ -528,6 +697,8 @@
}
}
}
+ if (checkAbort != null)
+ checkAbort.Work(maxDoc);
}
}
}
@@ -540,5 +711,34 @@
}
}
}
+
+ internal sealed class CheckAbort
+ {
+ private double workCount;
+ private MergePolicy.OneMerge merge;
+ private Directory dir;
+ public CheckAbort(MergePolicy.OneMerge merge, Directory dir)
+ {
+ this.merge = merge;
+ this.dir = dir;
+ }
+
+ /// <summary> Records the fact that roughly units amount of work
+ /// have been done since this method was last called.
+ /// When adding time-consuming code into SegmentMerger,
+ /// you should test different values for units to ensure
+ /// that the time in between calls to merge.checkAborted
+ /// is up to ~ 1 second.
+ /// </summary>
+ public void Work(double units)
+ {
+ workCount += units;
+ if (workCount >= 10000.0)
+ {
+ merge.CheckAborted(dir);
+ workCount = 0;
+ }
+ }
+ }
}
}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReade-2r.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentReade-2r.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReade-2r.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReade-2r.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,755 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using BitVector = Lucene.Net.Util.BitVector;
+
+namespace Lucene.Net.Index
+{
+
+ /// <version> $Id: SegmentReader.java 496851 2007-01-16 20:24:52Z mikemccand $
+ /// </version>
+ public class SegmentReader : IndexReader
+ {
+ private System.String segment;
+ private SegmentInfo si;
+
+ internal FieldInfos fieldInfos;
+ private FieldsReader fieldsReader;
+
+ internal TermInfosReader tis;
+ internal TermVectorsReader termVectorsReaderOrig = null;
+ internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
+
+ internal BitVector deletedDocs = null;
+ private bool deletedDocsDirty = false;
+ private bool normsDirty = false;
+ private bool undeleteAll = false;
+
+ private bool rollbackDeletedDocsDirty = false;
+ private bool rollbackNormsDirty = false;
+ private bool rollbackUndeleteAll = false;
+
+ internal IndexInput freqStream;
+ internal IndexInput proxStream;
+
+ // Compound File Reader when based on a compound file segment
+ internal CompoundFileReader cfsReader = null;
+
+ public FieldInfos FieldInfos
+ {
+ get { return fieldInfos; }
+ }
+
+ public IndexInput ProxStream
+ {
+ get { return proxStream; }
+ set { proxStream = value; }
+ }
+
+ private class Norm
+ {
+ private void InitBlock(SegmentReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private SegmentReader enclosingInstance;
+ public SegmentReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek)
+ {
+ InitBlock(enclosingInstance);
+ this.in_Renamed = in_Renamed;
+ this.number = number;
+ this.normSeek = normSeek;
+ }
+
+ internal IndexInput in_Renamed;
+ internal byte[] bytes;
+ internal bool dirty;
+ internal int number;
+ internal long normSeek;
+ internal bool rollbackDirty;
+
+ internal void ReWrite(SegmentInfo si)
+ {
+ // NOTE: norms are re-written in regular directory, not cfs
+
+ System.String oldFileName = si.GetNormFileName(this.number);
+ if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
+ {
+ // Mark this file for deletion. Note that we don't
+ // actually try to delete it until the new segments files is
+ // successfully written:
+ Enclosing_Instance.deleter.AddPendingFile(oldFileName);
+ }
+
+ si.AdvanceNormGen(this.number);
+ IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number));
+ try
+ {
+ out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
+ }
+ finally
+ {
+ out_Renamed.Close();
+ }
+ this.dirty = false;
+ }
+ }
+
+ private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
+
+ /// <summary>The class which implements SegmentReader. </summary>
+ private static System.Type IMPL;
+
+ public SegmentReader() : base(null)
+ {
+ }
+
+ public static SegmentReader Get(SegmentInfo si)
+ {
+ return Get(si.dir, si, null, false, false);
+ }
+
+ public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
+ {
+ return Get(si.dir, si, sis, closeDir, true);
+ }
+
+ public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
+ {
+ SegmentReader instance;
+ try
+ {
+ instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("cannot load SegmentReader class: " + e, e);
+ }
+ instance.Init(dir, sis, closeDir, ownDir);
+ instance.Initialize(si);
+ return instance;
+ }
+
+ private void Initialize(SegmentInfo si)
+ {
+ segment = si.name;
+ this.si = si;
+
+ bool success = false;
+
+ try
+ {
+ // Use compound file directory for some files, if it exists
+ Directory cfsDir = Directory();
+ if (si.GetUseCompoundFile())
+ {
+ cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
+ cfsDir = cfsReader;
+ }
+
+ // No compound file exists - use the multi-file format
+ fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
+ fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
+
+ // Verify two sources of "maxDoc" agree:
+ if (fieldsReader.Size() != si.docCount)
+ {
+ throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
+ }
+
+ tis = new TermInfosReader(cfsDir, segment, fieldInfos);
+
+ // NOTE: the bitvector is stored using the regular directory, not cfs
+ if (HasDeletions(si))
+ {
+ deletedDocs = new BitVector(Directory(), si.GetDelFileName());
+
+ // Verify # deletes does not exceed maxDoc for this segment:
+ if (deletedDocs.Count() > MaxDoc())
+ {
+ throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
+ }
+ }
+
+ // make sure that all index files have been read or are kept open
+ // so that if an index update removes them we'll still have them
+ freqStream = cfsDir.OpenInput(segment + ".frq");
+ proxStream = cfsDir.OpenInput(segment + ".prx");
+ OpenNorms(cfsDir);
+
+ if (fieldInfos.HasVectors())
+ {
+ // open term vector files only as needed
+ termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
+ }
+ success = true;
+ }
+ finally
+ {
+
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success)
+ {
+ DoClose();
+ }
+ }
+ }
+
+ protected internal override void DoCommit()
+ {
+ if (deletedDocsDirty)
+ {
+ // re-write deleted
+ System.String oldDelFileName = si.GetDelFileName();
+ if (oldDelFileName != null)
+ {
+ // Mark this file for deletion. Note that we don't
+ // actually try to delete it until the new segments files is
+ // successfully written:
+ deleter.AddPendingFile(oldDelFileName);
+ }
+
+ si.AdvanceDelGen();
+
+ // We can write directly to the actual name (vs to a
+ // .tmp & renaming it) because the file is not live
+ // until segments file is written:
+ deletedDocs.Write(Directory(), si.GetDelFileName());
+ }
+ if (undeleteAll && si.HasDeletions())
+ {
+ System.String oldDelFileName = si.GetDelFileName();
+ if (oldDelFileName != null)
+ {
+ // Mark this file for deletion. Note that we don't
+ // actually try to delete it until the new segments files is
+ // successfully written:
+ deleter.AddPendingFile(oldDelFileName);
+ }
+ si.ClearDelGen();
+ }
+ if (normsDirty)
+ {
+ // re-write norms
+ si.SetNumFields(fieldInfos.Size());
+ System.Collections.IEnumerator values = norms.Values.GetEnumerator();
+ while (values.MoveNext())
+ {
+ Norm norm = (Norm) values.Current;
+ if (norm.dirty)
+ {
+ norm.ReWrite(si);
+ }
+ }
+ }
+ deletedDocsDirty = false;
+ normsDirty = false;
+ undeleteAll = false;
+ }
+
+ protected internal override void DoClose()
+ {
+ if (fieldsReader != null)
+ {
+ fieldsReader.Close();
+ }
+ if (tis != null)
+ {
+ tis.Close();
+ }
+
+ if (freqStream != null)
+ freqStream.Close();
+ if (proxStream != null)
+ proxStream.Close();
+
+ CloseNorms();
+
+ if (termVectorsReaderOrig != null)
+ termVectorsReaderOrig.Close();
+
+ if (cfsReader != null)
+ cfsReader.Close();
+ }
+
+ internal static bool HasDeletions(SegmentInfo si)
+ {
+ return si.HasDeletions();
+ }
+
+ public override bool HasDeletions()
+ {
+ return deletedDocs != null;
+ }
+
+ internal static bool UsesCompoundFile(SegmentInfo si)
+ {
+ return si.GetUseCompoundFile();
+ }
+
+ internal static bool HasSeparateNorms(SegmentInfo si)
+ {
+ return si.HasSeparateNorms();
+ }
+
+ protected internal override void DoDelete(int docNum)
+ {
+ if (deletedDocs == null)
+ deletedDocs = new BitVector(MaxDoc());
+ deletedDocsDirty = true;
+ undeleteAll = false;
+ deletedDocs.Set(docNum);
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ deletedDocs = null;
+ deletedDocsDirty = false;
+ undeleteAll = true;
+ }
+
+ internal virtual System.Collections.ArrayList Files()
+ {
+ System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
+
+ if (si.GetUseCompoundFile())
+ {
+ System.String name = segment + ".cfs";
+ if (Directory().FileExists(name))
+ {
+ files.Add(name);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
+ {
+ System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
+ if (Directory().FileExists(name))
+ files.Add(name);
+ }
+ }
+
+ if (si.HasDeletions())
+ {
+ files.Add(si.GetDelFileName());
+ }
+
+ bool addedNrm = false;
+ for (int i = 0; i < fieldInfos.Size(); i++)
+ {
+ System.String name = si.GetNormFileName(i);
+ if (name != null && Directory().FileExists(name))
+ {
+ if (name.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
+ {
+ if (addedNrm)
+ continue; // add .nrm just once
+ addedNrm = true;
+ }
+ files.Add(name);
+ }
+ }
+ return files;
+ }
+
+ public override TermEnum Terms()
+ {
+ return tis.Terms();
+ }
+
+ public override TermEnum Terms(Term t)
+ {
+ return tis.Terms(t);
+ }
+
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ lock (this)
+ {
+ if (IsDeleted(n))
+ throw new System.ArgumentException("attempt to access a deleted document");
+ return fieldsReader.Doc(n, fieldSelector);
+ }
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ lock (this)
+ {
+ return (deletedDocs != null && deletedDocs.Get(n));
+ }
+ }
+
+ public override TermDocs TermDocs()
+ {
+ return new SegmentTermDocs(this);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ return new SegmentTermPositions(this);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ TermInfo ti = tis.Get(t);
+ if (ti != null)
+ return ti.docFreq;
+ else
+ return 0;
+ }
+
+ public override int NumDocs()
+ {
+ int n = MaxDoc();
+ if (deletedDocs != null)
+ n -= deletedDocs.Count();
+ return n;
+ }
+
+ public override int MaxDoc()
+ {
+ return si.docCount;
+ }
+
+ /// <seealso cref="fldOption)">
+ /// </seealso>
+ public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
+ {
+
+ System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
+ for (int i = 0; i < fieldInfos.Size(); i++)
+ {
+ FieldInfo fi = fieldInfos.FieldInfo(i);
+ if (fieldOption == IndexReader.FieldOption.ALL)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ }
+ return fieldSet;
+ }
+
+
+ public override bool HasNorms(System.String field)
+ {
+ lock (this)
+ {
+ return norms.ContainsKey(field);
+ }
+ }
+
+ internal static byte[] CreateFakeNorms(int size)
+ {
+ byte[] ones = new byte[size];
+ byte val = DefaultSimilarity.EncodeNorm(1.0f);
+ for (int index = 0; index < size; index++)
+ ones[index] = val;
+ return ones;
+ }
+
+ private byte[] ones;
+ private byte[] FakeNorms()
+ {
+ if (ones == null)
+ ones = CreateFakeNorms(MaxDoc());
+ return ones;
+ }
+
+ // can return null if norms aren't stored
+ protected internal virtual byte[] GetNorms(System.String field)
+ {
+ lock (this)
+ {
+ Norm norm = (Norm) norms[field];
+ if (norm == null)
+ return null; // not indexed, or norms not stored
+ if (norm.bytes == null)
+ {
+ // value not yet read
+ byte[] bytes = new byte[MaxDoc()];
+ Norms(field, bytes, 0);
+ norm.bytes = bytes; // cache it
+ }
+ return norm.bytes;
+ }
+ }
+
+ // returns fake norms if norms aren't available
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ byte[] bytes = GetNorms(field);
+ if (bytes == null)
+ bytes = FakeNorms();
+ return bytes;
+ }
+ }
+
+ protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
+ {
+ Norm norm = (Norm) norms[field];
+ if (norm == null)
+ // not an indexed field
+ return ;
+ norm.dirty = true; // mark it dirty
+ normsDirty = true;
+
+ Norms(field)[doc] = value_Renamed; // set the value
+ }
+
+ /// <summary>Read norms into a pre-allocated array. </summary>
+ public override void Norms(System.String field, byte[] bytes, int offset)
+ {
+ lock (this)
+ {
+
+ Norm norm = (Norm) norms[field];
+ if (norm == null)
+ {
+ Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
+ return ;
+ }
+
+ if (norm.bytes != null)
+ {
+ // can copy from cache
+ Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
+ return ;
+ }
+
+ IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
+ try
+ {
+ // read from disk
+ normStream.Seek(norm.normSeek);
+ normStream.ReadBytes(bytes, offset, MaxDoc());
+ }
+ finally
+ {
+ normStream.Close();
+ }
+ }
+ }
+
+
+ private void OpenNorms(Directory cfsDir)
+ {
+ long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now)
+ int maxDoc = MaxDoc();
+ for (int i = 0; i < fieldInfos.Size(); i++)
+ {
+ FieldInfo fi = fieldInfos.FieldInfo(i);
+ if (fi.isIndexed && !fi.omitNorms)
+ {
+ Directory d = Directory();
+ System.String fileName = si.GetNormFileName(fi.number);
+ if (!si.HasSeparateNorms(fi.number))
+ {
+ d = cfsDir;
+ }
+ long normSeek = (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)?nextNormSeek:0);
+ norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number, normSeek);
+ nextNormSeek += maxDoc; // increment also if some norms are separate
+ }
+ }
+ }
+
+ private void CloseNorms()
+ {
+ lock (norms.SyncRoot)
+ {
+ System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
+ while (enumerator.MoveNext())
+ {
+ Norm norm = (Norm) enumerator.Current;
+ norm.in_Renamed.Close();
+ }
+ }
+ }
+
+ /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
+ /// <returns> TermVectorsReader
+ /// </returns>
+ private TermVectorsReader GetTermVectorsReader()
+ {
+ TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
+ if (tvReader == null)
+ {
+ tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
+ System.Threading.Thread.SetData(termVectorsLocal, tvReader);
+ }
+ return tvReader;
+ }
+
+ /// <summary>Return a term frequency vector for the specified document and field. The
+ /// vector returned contains term numbers and frequencies for all terms in
+ /// the specified field of this document, if the field had storeTermVector
+ /// flag set. If the flag was not set, the method returns null.
+ /// </summary>
+ /// <throws> IOException </throws>
+ public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
+ {
+ // Check if this field is invalid or has no stored term vector
+ FieldInfo fi = fieldInfos.FieldInfo(field);
+ if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
+ return null;
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
+ return null;
+
+ return termVectorsReader.Get(docNumber, field);
+ }
+
+
+ /// <summary>Return an array of term frequency vectors for the specified document.
+ /// The array contains a vector for each vectorized field in the document.
+ /// Each vector vector contains term numbers and frequencies for all terms
+ /// in a given vectorized field.
+ /// If no such fields existed, the method returns null.
+ /// </summary>
+ /// <throws> IOException </throws>
+ public override TermFreqVector[] GetTermFreqVectors(int docNumber)
+ {
+ if (termVectorsReaderOrig == null)
+ return null;
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
+ return null;
+
+ return termVectorsReader.Get(docNumber);
+ }
+
+ /// <summary> Return the name of the segment this reader is reading.</summary>
+ internal virtual System.String GetSegmentName()
+ {
+ return segment;
+ }
+
+ internal virtual void SetSegmentInfo(SegmentInfo info)
+ {
+ si = info;
+ }
+
+ internal override void StartCommit()
+ {
+ base.StartCommit();
+ rollbackDeletedDocsDirty = deletedDocsDirty;
+ rollbackNormsDirty = normsDirty;
+ rollbackUndeleteAll = undeleteAll;
+ System.Collections.IEnumerator values = norms.Values.GetEnumerator();
+ while (values.MoveNext())
+ {
+ Norm norm = (Norm) values.Current;
+ norm.rollbackDirty = norm.dirty;
+ }
+ }
+
+ internal override void RollbackCommit()
+ {
+ base.RollbackCommit();
+ deletedDocsDirty = rollbackDeletedDocsDirty;
+ normsDirty = rollbackNormsDirty;
+ undeleteAll = rollbackUndeleteAll;
+ System.Collections.IEnumerator values = norms.Values.GetEnumerator();
+ while (values.MoveNext())
+ {
+ Norm norm = (Norm) values.Current;
+ norm.dirty = norm.rollbackDirty;
+ }
+ }
+ static SegmentReader()
+ {
+ {
+ try
+ {
+ System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
+ IMPL = System.Type.GetType(name);
+ }
+ catch (System.Security.SecurityException se)
+ {
+ try
+ {
+ IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("cannot load default SegmentReader class: " + e, e);
+ }
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("cannot load SegmentReader class: " + e, e);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file