You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by do...@apache.org on 2009/07/29 20:04:24 UTC
svn commit: r798995 [4/35] - in /incubator/lucene.net/trunk/C#/src:
Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/
Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/
Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net/...
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/CheckIndex.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CheckIndex.cs Wed Jul 29 18:04:12 2009
@@ -16,6 +16,7 @@
*/
using System;
+using System.Collections.Generic;
using Document = Lucene.Net.Documents.Document;
using Directory = Lucene.Net.Store.Directory;
@@ -24,359 +25,594 @@
namespace Lucene.Net.Index
{
-
- /// <summary> Basic tool to check the health of an index and write a
- /// new segments file that removes reference to problematic
- /// segments. There are many more checks that this tool
- /// could do but does not yet, eg: reconstructing a segments
- /// file by looking for all loadable segments (if no segments
- /// file is found), removing specifically specified segments,
- /// listing files that exist but are not referenced, etc.
- /// </summary>
-
- public class CheckIndex
- {
-
- public static System.IO.TextWriter out_Renamed;
-
- private class MySegmentTermDocs : SegmentTermDocs
- {
-
- internal int delCount;
-
- internal MySegmentTermDocs(SegmentReader p) : base(p)
- {
- }
-
- public override void Seek(Term term)
- {
- base.Seek(term);
- delCount = 0;
- }
-
- protected internal override void SkippingDoc()
- {
- delCount++;
- }
- }
-
- /// <summary>Returns true if index is clean, else false.</summary>
- public static bool Check(Directory dir, bool doFix)
- {
- System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
- SegmentInfos sis = new SegmentInfos();
-
- try
- {
- sis.Read(dir);
- }
- catch (System.Exception t)
- {
- out_Renamed.WriteLine("ERROR: could not read any segments file in directory");
- out_Renamed.Write(t.StackTrace);
- out_Renamed.Flush();
- return false;
- }
-
- int numSegments = sis.Count;
- System.String segmentsFileName = sis.GetCurrentSegmentFileName();
- IndexInput input = null;
- try
- {
- input = dir.OpenInput(segmentsFileName);
- }
- catch (System.Exception t)
- {
- out_Renamed.WriteLine("ERROR: could not open segments file in directory");
- out_Renamed.Write(t.StackTrace);
- out_Renamed.Flush();
- return false;
- }
- int format = 0;
- try
- {
- format = input.ReadInt();
- }
- catch (System.Exception t)
- {
- out_Renamed.WriteLine("ERROR: could not read segment file version in directory");
- out_Renamed.Write(t.StackTrace);
- out_Renamed.Flush();
- return false;
- }
- finally
- {
- if (input != null)
- input.Close();
- }
-
- System.String sFormat = "";
- bool skip = false;
-
- if (format == SegmentInfos.FORMAT)
- sFormat = "FORMAT [Lucene Pre-2.1]";
- if (format == SegmentInfos.FORMAT_LOCKLESS)
- sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
- else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
- sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
- else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
- sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
- else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE)
- {
- sFormat = "int=" + format + " [newer version of Lucene than this tool]";
- skip = true;
- }
- else
- {
- sFormat = format + " [Lucene 1.3 or prior]";
- }
-
- out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
-
- if (skip)
- {
- out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
- return false;
- }
-
- SegmentInfos newSIS = (SegmentInfos) sis.Clone();
- newSIS.Clear();
- bool changed = false;
- int totLoseDocCount = 0;
- int numBadSegments = 0;
- for (int i = 0; i < numSegments; i++)
- {
- SegmentInfo info = sis.Info(i);
- out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
- int toLoseDocCount = info.docCount;
-
- SegmentReader reader = null;
-
- try
- {
- out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile());
- out_Renamed.WriteLine(" numFiles=" + info.Files().Count);
- out_Renamed.WriteLine(String.Format(nf, " size (MB)={0:f}", new Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
- int docStoreOffset = info.GetDocStoreOffset();
- if (docStoreOffset != - 1)
- {
- out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset);
- out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment());
- out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
- }
- System.String delFileName = info.GetDelFileName();
- if (delFileName == null)
- out_Renamed.WriteLine(" no deletions");
- else
- out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]");
- out_Renamed.Write(" test: open reader.........");
- reader = SegmentReader.Get(info);
- int numDocs = reader.NumDocs();
- toLoseDocCount = numDocs;
- if (reader.HasDeletions())
- out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]");
- else
- out_Renamed.WriteLine("OK");
-
- out_Renamed.Write(" test: fields, norms.......");
- System.Collections.ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
- System.Collections.IEnumerator it = fieldNames.GetEnumerator();
- while (it.MoveNext())
- {
- System.String fieldName = (System.String) it.Current;
- byte[] b = reader.Norms(fieldName);
- if (b.Length != info.docCount)
- throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
- }
- out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]");
-
- out_Renamed.Write(" test: terms, freq, prox...");
- TermEnum termEnum = reader.Terms();
- TermPositions termPositions = reader.TermPositions();
-
- // Used only to count up # deleted docs for this
- // term
- MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
-
- long termCount = 0;
- long totFreq = 0;
- long totPos = 0;
- while (termEnum.Next())
- {
- termCount++;
- Term term = termEnum.Term();
- int docFreq = termEnum.DocFreq();
- termPositions.Seek(term);
- int lastDoc = - 1;
- int freq0 = 0;
- totFreq += docFreq;
- while (termPositions.Next())
- {
- freq0++;
- int doc = termPositions.Doc();
- int freq = termPositions.Freq();
- if (doc <= lastDoc)
- {
- throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
- }
- lastDoc = doc;
- if (freq <= 0)
- {
- throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
- }
-
- int lastPos = - 1;
- totPos += freq;
- for (int j = 0; j < freq; j++)
- {
- int pos = termPositions.NextPosition();
- if (pos < -1)
- {
- throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
- }
- if (pos < lastPos)
- {
- throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
- }
- }
- }
-
- // Now count how many deleted docs occurred in
- // this term:
- int delCount;
- if (reader.HasDeletions())
- {
- myTermDocs.Seek(term);
- while (myTermDocs.Next())
- {
- }
- delCount = myTermDocs.delCount;
- }
- else
- delCount = 0;
-
- if (freq0 + delCount != docFreq)
- {
- throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
- }
- }
-
- out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
-
- out_Renamed.Write(" test: stored fields.......");
- int docCount = 0;
- long totFields = 0;
- for (int j = 0; j < info.docCount; j++)
- if (!reader.IsDeleted(j))
- {
- docCount++;
- Document doc = reader.Document(j);
- totFields += doc.GetFields().Count;
- }
-
- if (docCount != reader.NumDocs())
- throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
-
- out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new Object[] { totFields, (((float)totFields) / docCount) }));
-
- out_Renamed.Write(" test: term vectors........");
- int totVectors = 0;
- for (int j = 0; j < info.docCount; j++)
- if (!reader.IsDeleted(j))
- {
- TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
- if (tfv != null)
- totVectors += tfv.Length;
- }
-
- out_Renamed.WriteLine(String.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new Object[] { totVectors, (((float)totVectors) / docCount) }));
- out_Renamed.WriteLine("");
- }
- catch (System.Exception t)
- {
- out_Renamed.WriteLine("FAILED");
- System.String comment;
- if (doFix)
- comment = "will remove reference to this segment (-fix is specified)";
- else
- comment = "would remove reference to this segment (-fix was not specified)";
- out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:");
- out_Renamed.Write(t.StackTrace);
- out_Renamed.Flush();
- out_Renamed.WriteLine("");
- totLoseDocCount += toLoseDocCount;
- numBadSegments++;
- changed = true;
- continue;
- }
- finally
- {
- if (reader != null)
- reader.Close();
- }
-
- // Keeper
- newSIS.Add(info.Clone());
- }
-
- if (!changed)
- {
- out_Renamed.WriteLine("No problems were detected with this index.\n");
- return true;
- }
- else
- {
- out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected");
- if (doFix)
- out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost");
- else
- out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
- out_Renamed.WriteLine();
- }
-
- if (doFix)
- {
- out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
- for (int i = 0; i < 5; i++)
- {
- try
- {
- System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
- }
- catch (System.Threading.ThreadInterruptedException)
- {
- SupportClass.ThreadClass.Current().Interrupt();
- i--;
- continue;
- }
-
- out_Renamed.WriteLine(" " + (5 - i) + "...");
- }
- out_Renamed.Write("Writing...");
- try
- {
- newSIS.Write(dir);
- }
- catch (System.Exception t)
- {
- out_Renamed.WriteLine("FAILED; exiting");
- out_Renamed.Write(t.StackTrace);
- out_Renamed.Flush();
- return false;
- }
- out_Renamed.WriteLine("OK");
- out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\"");
- }
- else
- {
- out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]");
- }
- out_Renamed.WriteLine("");
-
- return false;
- }
+ /// <summary>
+ /// Basic tool and API to check the health of an index and
+ /// write a new segments file that removes reference to
+ /// problematic segments.
+ ///
+ /// <p>As this tool checks every byte in the index, on a large
+ /// index it can take quite a long time to run.
+ ///
+ /// <p><b>WARNING</b>: this tool and API is new and
+ /// experimental and is subject to suddenly change in the
+ /// next release. Please make a complete backup of your
+ /// index before using this to fix your index!
+ /// </summary>
+ public class CheckIndex
+ {
+ /// <summary>
+ /// Default print stream for all CheckIndex instances.
+ /// </summary>
+ [Obsolete("use SetInfoStream per instance instead")]
+ public static System.IO.TextWriter out_Renamed = null;
- static bool assertsOn;
+ private System.IO.TextWriter infoStream;
+ private Directory dir;
+
+ /**
+ * Returned from {@link #CheckIndex()} detailing the health and status of the index.
+ *
+ * <p><b>WARNING</b>: this API is new and experimental and is
+ * subject to suddenly change in the next release.
+ **/
+
+ public class Status
+ {
+
+ /** True if no problems were found with the index. */
+ public bool clean;
+
+ /** True if we were unable to locate and load the segments_N file. */
+ public bool missingSegments;
+
+ /** True if we were unable to open the segments_N file. */
+ public bool cantOpenSegments;
+
+ /** True if we were unable to read the version number from segments_N file. */
+ public bool missingSegmentVersion;
+
+ /** Name of latest segments_N file in the index. */
+ public string segmentsFileName;
+
+ /** Number of segments in the index. */
+ public int numSegments;
+
+ /** string description of the version of the index. */
+ public string segmentFormat;
+
+ /** Empty unless you passed specific segments list to check as optional 3rd argument.
+ * @see CheckIndex#CheckIndex(List) */
+ //public IList<string> segmentsChecked = new List<string>();
+ public IList<object> segmentsChecked = new List<object>();
+
+ /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
+ public bool toolOutOfDate;
+
+ /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
+ public IList<SegmentInfoStatus> segmentInfos = new List<SegmentInfoStatus>();
+
+ /** Directory index is in. */
+ public Directory dir;
+
+ /** SegmentInfos instance containing only segments that
+ * had no problems (this is used with the {@link
+ * CheckIndex#fix} method to repair the index. */
+ internal SegmentInfos newSegments;
+
+ /** How many documents will be lost to bad segments. */
+ public int totLoseDocCount;
+
+ /** How many bad segments were found. */
+ public int numBadSegments;
+
+ /** True if we checked only specific segments ({@link
+ * #CheckIndex(List)}) was called with non-null
+ * argument). */
+ public bool partial;
+
+ /** Holds the status of each segment in the index.
+ * See {@link #segmentInfos}.
+ *
+ * <p><b>WARNING</b>: this API is new and experimental and is
+ * subject to suddenly change in the next release.
+ */
+ public class SegmentInfoStatus
+ {
+ /** Name of the segment. */
+ public string name;
+
+ /** Document count (does not take deletions into account). */
+ public int docCount;
+
+ /** True if segment is compound file format. */
+ public bool compound;
+
+ /** Number of files referenced by this segment. */
+ public int numFiles;
+
+ /** Net size (MB) of the files referenced by this
+ * segment. */
+ public double sizeMB;
+
+ /** Doc store offset, if this segment shares the doc
+ * store files (stored fields and term vectors) with
+ * other segments. This is -1 if it does not share. */
+ public int docStoreOffset = -1;
+
+ /** string of the shared doc store segment, or null if
+ * this segment does not share the doc store files. */
+ public string docStoreSegment;
+
+ /** True if the shared doc store files are compound file
+ * format. */
+ public bool docStoreCompoundFile;
+
+ /** True if this segment has pending deletions. */
+ public bool hasDeletions;
+
+ /** Name of the current deletions file name. */
+ public string deletionsFileName;
+
+ /** Number of deleted documents. */
+ public int numDeleted;
+
+ /** True if we were able to open a SegmentReader on this
+ * segment. */
+ public bool openReaderPassed;
+
+ /** Number of fields in this segment. */
+ public int numFields;
+
+ /** True if at least one of the fields in this segment
+ * does not omitTf.
+ * @see Fieldable#setOmitTf */
+ public bool hasProx;
+ }
+ }
+
+ /** Create a new CheckIndex on the directory. */
+ public CheckIndex(Directory dir)
+ {
+ this.dir = dir;
+ infoStream = out_Renamed;
+ }
+
+ /** Set infoStream where messages should go. If null, no
+ * messages are printed */
+ public void SetInfoStream(System.IO.TextWriter out_Renamed)
+ {
+ infoStream = out_Renamed;
+ }
+
+ private void Msg(string msg)
+ {
+ if (infoStream != null)
+ infoStream.WriteLine(msg);
+ }
+
+
+ private class MySegmentTermDocs : SegmentTermDocs
+ {
+
+ internal int delCount;
+
+ internal MySegmentTermDocs(SegmentReader p)
+ : base(p)
+ {
+ }
+
+ public override void Seek(Term term)
+ {
+ base.Seek(term);
+ delCount = 0;
+ }
+
+ protected internal override void SkippingDoc()
+ {
+ delCount++;
+ }
+ }
+
+
+ /** Returns true if index is clean, else false.
+ * @deprecated Please instantiate a CheckIndex and then use {@link #CheckIndex()} instead */
+ public static bool Check(Directory dir, bool doFix)
+ {
+ return Check(dir, doFix, null);
+ }
+
+ /** Returns true if index is clean, else false.
+ * @deprecated Please instantiate a CheckIndex and then use {@link #CheckIndex(List)} instead */
+ public static bool Check(Directory dir, bool doFix, IList<object> onlySegments)
+ {
+ CheckIndex checker = new CheckIndex(dir);
+ Status status = checker.CheckIndex_Renamed(onlySegments);
+ if (doFix && !status.clean)
+ checker.FixIndex(status);
+
+ return status.clean;
+ }
+
+ /** Returns a {@link Status} instance detailing
+ * the state of the index.
+ *
+ * <p>As this method checks every byte in the index, on a large
+ * index it can take quite a long time to run.
+ *
+ * <p><b>WARNING</b>: make sure
+ * you only call this when the index is not opened by any
+ * writer. */
+ public Status CheckIndex_Renamed()
+ {
+ return CheckIndex_Renamed(null);
+ }
+
+ /** Returns a {@link Status} instance detailing
+ * the state of the index.
+ *
+ * @param onlySegments list of specific segment names to check
+ *
+ * <p>As this method checks every byte in the specified
+ * segments, on a large index it can take quite a long
+ * time to run.
+ *
+ * <p><b>WARNING</b>: make sure
+ * you only call this when the index is not opened by any
+ * writer. */
+ public Status CheckIndex_Renamed(IList<object> onlySegments)
+ {
+ System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
+ SegmentInfos sis = new SegmentInfos();
+ Status result = new Status();
+ result.dir = dir;
+ try
+ {
+ sis.Read(dir);
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not read any segments file in directory");
+ result.missingSegments = true;
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ return result;
+ }
+
+ int numSegments = sis.Count;
+ string segmentsFileName = sis.GetCurrentSegmentFileName();
+ IndexInput input = null;
+ try
+ {
+ input = dir.OpenInput(segmentsFileName);
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not open segments file in directory");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ result.cantOpenSegments = true;
+ return result;
+ }
+ int format = 0;
+ try
+ {
+ format = input.ReadInt();
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not read segment file version in directory");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ result.missingSegmentVersion = true;
+ return result;
+ }
+ finally
+ {
+ if (input != null)
+ input.Close();
+ }
+
+ string sFormat = "";
+ bool skip = false;
+
+ if (format == SegmentInfos.FORMAT)
+ sFormat = "FORMAT [Lucene Pre-2.1]";
+ if (format == SegmentInfos.FORMAT_LOCKLESS)
+ sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
+ else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+ sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
+ else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
+ sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
+ else
+ {
+ if (format == SegmentInfos.FORMAT_CHECKSUM)
+ sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_DEL_COUNT)
+ sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_HAS_PROX)
+ sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
+ else if (format < SegmentInfos.CURRENT_FORMAT)
+ {
+ sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+ skip = true;
+ }
+ else
+ {
+ sFormat = format + " [Lucene 1.3 or prior]";
+ }
+ }
+
+ Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
+ result.segmentsFileName = segmentsFileName;
+ result.numSegments = numSegments;
+ result.segmentFormat = sFormat;
+
+ if (onlySegments != null)
+ {
+ result.partial = true;
+ if (infoStream != null)
+ infoStream.Write("\nChecking only these segments:");
+ IEnumerator<object> it = onlySegments.GetEnumerator();
+ while (it.MoveNext())
+ {
+ if (infoStream != null)
+ infoStream.Write(" " + it.Current);
+ }
+ SupportClass.CollectionsSupport.AddAll(onlySegments, (System.Collections.Generic.IList<object>)(result.segmentsChecked));
+ Msg(":");
+ }
+
+ if (skip)
+ {
+ Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+ result.toolOutOfDate = true;
+ return result;
+ }
+
+
+ result.newSegments = (SegmentInfos)sis.Clone();
+ result.newSegments.Clear();
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = sis.Info(i);
+ if (onlySegments != null && !onlySegments.Contains(info.name))
+ continue;
+ Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
+ result.segmentInfos.Add(segInfoStat);
+ Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+ segInfoStat.name = info.name;
+ segInfoStat.docCount = info.docCount;
+
+ int toLoseDocCount = info.docCount;
+
+ SegmentReader reader = null;
+
+ try
+ {
+ Msg(" compound=" + info.GetUseCompoundFile());
+ segInfoStat.compound = info.GetUseCompoundFile();
+ Msg(" hasProx=" + info.GetHasProx());
+ segInfoStat.hasProx = info.GetHasProx();
+ Msg(" numFiles=" + info.Files().Count);
+ segInfoStat.numFiles = info.Files().Count;
+ //msg(" size (MB)=" + nf.Format(info.SizeInBytes()/(1024.*1024.)));
+ Msg(string.Format(nf, " size (MB)={0:f}", new object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
+ segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
+
+
+ int docStoreOffset = info.GetDocStoreOffset();
+ if (docStoreOffset != -1)
+ {
+ Msg(" docStoreOffset=" + docStoreOffset);
+ segInfoStat.docStoreOffset = docStoreOffset;
+ Msg(" docStoreSegment=" + info.GetDocStoreSegment());
+ segInfoStat.docStoreSegment = info.GetDocStoreSegment();
+ Msg(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
+ segInfoStat.docStoreCompoundFile = info.GetDocStoreIsCompoundFile();
+ }
+ string delFileName = info.GetDelFileName();
+ if (delFileName == null)
+ {
+ Msg(" no deletions");
+ segInfoStat.hasDeletions = false;
+ }
+ else
+ {
+ Msg(" has deletions [delFileName=" + delFileName + "]");
+ segInfoStat.hasDeletions = true;
+ segInfoStat.deletionsFileName = delFileName;
+ }
+ if (infoStream != null)
+ infoStream.Write(" test: open reader.........");
+ reader = SegmentReader.Get(info);
+ int numDocs = reader.NumDocs();
+ toLoseDocCount = numDocs;
+ if (reader.HasDeletions())
+ {
+ if (info.docCount - numDocs != info.GetDelCount())
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ segInfoStat.numDeleted = info.docCount - numDocs;
+ Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
+ }
+ else
+ {
+ if (info.GetDelCount() != 0)
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ Msg("OK");
+ }
+
+ if (infoStream != null)
+ infoStream.Write(" test: fields, norms.......");
+ ICollection<string> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
+ IEnumerator<string> it = fieldNames.GetEnumerator();
+ while (it.MoveNext())
+ {
+ string fieldName = it.Current;
+ byte[] b = reader.Norms(fieldName);
+ if (b.Length != info.docCount)
+ throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
+
+ }
+ Msg("OK [" + fieldNames.Count + " fields]");
+ segInfoStat.numFields = fieldNames.Count;
+ if (infoStream != null)
+ infoStream.Write(" test: terms, freq, prox...");
+ TermEnum termEnum = reader.Terms();
+ TermPositions termPositions = reader.TermPositions();
+
+ // Used only to count up # deleted docs for this
+ // term
+ MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+
+ long termCount = 0;
+ long totFreq = 0;
+ long totPos = 0;
+ while (termEnum.Next())
+ {
+ termCount++;
+ Term term = termEnum.Term();
+ int docFreq = termEnum.DocFreq();
+ termPositions.Seek(term);
+ int lastDoc = -1;
+ int freq0 = 0;
+ totFreq += docFreq;
+ while (termPositions.Next())
+ {
+ freq0++;
+ int doc = termPositions.Doc();
+ int freq = termPositions.Freq();
+ if (doc <= lastDoc)
+ throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+ lastDoc = doc;
+ if (freq <= 0)
+ throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+
+ int lastPos = -1;
+ totPos += freq;
+ for (int j = 0; j < freq; j++)
+ {
+ int pos = termPositions.NextPosition();
+ if (pos < -1)
+ throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+ if (pos < lastPos)
+ throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+ }
+ }
+
+ // Now count how many deleted docs occurred in
+ // this term:
+ int delCount;
+ if (reader.HasDeletions())
+ {
+ myTermDocs.Seek(term);
+ while (myTermDocs.Next())
+ {
+ }
+ delCount = myTermDocs.delCount;
+ }
+ else
+ delCount = 0;
+
+ if (freq0 + delCount != docFreq)
+ throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+ }
+
+ Msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
+
+ if (infoStream != null)
+ infoStream.Write(" test: stored fields.......");
+ int docCount = 0;
+ long totFields = 0;
+ for (int j = 0; j < info.docCount; j++)
+ if (!reader.IsDeleted(j))
+ {
+ docCount++;
+ Document doc = reader.Document(j);
+ totFields += doc.GetFields().Count;
+ }
+
+ if (docCount != reader.NumDocs())
+ throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
+
+ //msg("OK [" + totFields + " total field count; avg " + nf.Format((((float) totFields)/docCount)) + " fields per doc]");
+ Msg(string.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { totFields, (((float)totFields) / docCount) }));
+
+ if (infoStream != null)
+ infoStream.Write(" test: term vectors........");
+ int totVectors = 0;
+ for (int j = 0; j < info.docCount; j++)
+ if (!reader.IsDeleted(j))
+ {
+ TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
+ if (tfv != null)
+ totVectors += tfv.Length;
+ }
+
+ //msg("OK [" + totVectors + " total vector count; avg " + nf.Format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");
+ Msg(string.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { totVectors, (((float)totVectors) / docCount) }));
+ Msg("");
+
+ }
+ catch (System.Exception t)
+ {
+ Msg("FAILED");
+ string comment;
+ comment = "FixIndex() would remove reference to this segment";
+ Msg(" WARNING: " + comment + "; full exception:");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ Msg("");
+ result.totLoseDocCount += toLoseDocCount;
+ result.numBadSegments++;
+ continue;
+ }
+ finally
+ {
+ if (reader != null)
+ reader.Close();
+ }
+
+ // Keeper
+ result.newSegments.Add(info.Clone());
+ }
+
+ if (0 == result.numBadSegments)
+ {
+ result.clean = true;
+ Msg("No problems were detected with this index.\n");
+ }
+ else
+ Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
+
+ return result;
+ }
+
+ /** Repairs the index using previously returned result
+ * from {@link #checkIndex}. Note that this does not
+ * remove any of the unreferenced files after it's done;
+ * you must separately open an {@link IndexWriter}, which
+ * deletes unreferenced files when it's created.
+ *
+ * <p><b>WARNING</b>: this writes a
+ * new segments file into the index, effectively removing
+ * all documents in broken segments from the index.
+ * BE CAREFUL.
+ *
+ * <p><b>WARNING</b>: Make sure you only call this when the
+ * index is not opened by any writer. */
+ public void FixIndex(Status result)
+ {
+ if (result.partial)
+ throw new ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
+ result.newSegments.Commit(result.dir);
+ }
+
+ private static bool assertsOn;
private static bool TestAsserts()
{
@@ -384,58 +620,552 @@
return true;
}
- [STAThread]
- public static void Main(System.String[] args)
- {
-
- bool doFix = false;
- for (int i = 0; i < args.Length; i++)
- if (args[i].Equals("-fix"))
- {
- doFix = true;
- break;
- }
-
- if (args.Length != (doFix ? 2 : 1))
- {
- out_Renamed.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has has any\n" + "corrup
tion, else 0.\n");
- System.Environment.Exit(1);
- }
-
+ private static bool AssertsOn()
+ {
System.Diagnostics.Debug.Assert(TestAsserts());
- if (!assertsOn)
- System.Console.WriteLine("\nNote: testing will be more thorough if you run with System.Diagnostic.Debug.Assert() enabled.");
+ return assertsOn;
+ }
+
+ /** Command-line interface to check and fix an index.
+
+ <p>
+ Run it like this:
+ <pre>
+ java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+ </pre>
+ <ul>
+ <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments
+
+ <li><code>-segment X</code>: only check the specified
+ segment(s). This can be specified multiple times,
+ to check more than one segment, eg <code>-segment _2
+ -segment _a</code>. You can't use this with the -fix
+ option.
+ </ul>
+
+ <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
+ documents (perhaps many) to be permanently removed from the index. Always make
+ a backup copy of your index before running this! Do not run this tool on an index
+ that is actively being written to. You have been warned!
+
+ <p> Run without -fix, this tool will open the index, report version information
+ and report any exceptions it hits and what action it would take if -fix were
+ specified. With -fix, this tool will remove any segments that have issues and
+ write a new segments_N file. This means all documents contained in the affected
+ segments will be removed.
+
+ <p>
+ This tool exits with exit code 1 if the index cannot be opened or has any
+ corruption, else 0.
+ */
+ [STAThread]
+ public static void Main(string[] args)
+ {
+
+ bool doFix = false;
+ IList<object> onlySegments = new List<object>();
+ string indexPath = null;
+ int i = 0;
+ while (i < args.Length)
+ {
+ if (args[i].Equals("-fix"))
+ {
+ doFix = true;
+ i++;
+ }
+ else if (args[i].Equals("-segment"))
+ {
+ if (i == args.Length - 1)
+ {
+ System.Console.WriteLine("ERROR: missing name for -segment option");
+ System.Environment.Exit(1);
+ }
+ onlySegments.Add(args[i + 1]);
+ i += 2;
+ }
+ else
+ {
+ if (indexPath != null)
+ {
+ System.Console.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
+ System.Environment.Exit(1);
+ }
+ indexPath = args[i];
+ i++;
+ }
+ }
+
+ if (indexPath == null)
+ {
+ System.Console.WriteLine("\nERROR: index path not specified");
+ System.Console.WriteLine("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
+ "\n" +
+ " -fix: actually write a new segments_N file, removing any problematic segments\n" +
+ " -segment X: only check the specified segments. This can be specified multiple\n" +
+ " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
+ " You can't use this with the -fix option\n" +
+ "\n" +
+ "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
+ "documents (perhaps many) to be permanently removed from the index. Always make\n" +
+ "a backup copy of your index before running this! Do not run this tool on an index\n" +
+ "that is actively being written to. You have been warned!\n" +
+ "\n" +
+ "Run without -fix, this tool will open the index, report version information\n" +
+ "and report any exceptions it hits and what action it would take if -fix were\n" +
+ "specified. With -fix, this tool will remove any segments that have issues and\n" +
+ "write a new segments_N file. This means all documents contained in the affected\n" +
+ "segments will be removed.\n" +
+ "\n" +
+ "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
+ "corruption, else 0.\n");
+ System.Environment.Exit(1);
+ }
+
+ if (!AssertsOn())
+ System.Console.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+
+ if (onlySegments.Count == 0)
+ onlySegments = null;
+ else if (doFix)
+ {
+ System.Console.WriteLine("ERROR: cannot specify both -fix and -segment");
+ System.Environment.Exit(1);
+ }
+
+ System.Console.WriteLine("\nOpening index @ " + indexPath + "\n");
+ Directory dir = null;
+ try
+ {
+ dir = FSDirectory.GetDirectory(indexPath);
+ }
+ catch (System.Exception t)
+ {
+ System.Console.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+ System.Console.WriteLine(t.StackTrace);
+ System.Environment.Exit(1);
+ }
+
+ CheckIndex checker = new CheckIndex(dir);
+ checker.SetInfoStream(new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding));
+
+ Status result = checker.CheckIndex_Renamed(onlySegments);
+
+ if (!result.clean)
+ {
+ if (!doFix)
+ {
+ System.Console.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+ }
+ else
+ {
+ System.Console.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+ System.Console.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+ for (int s = 0; s < 5; s++)
+ {
+ try
+ {
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64)1000 * 1000));
+ }
+ catch (System.Threading.ThreadInterruptedException)
+ {
+ SupportClass.ThreadClass.Current().Interrupt();
+ s--;
+ continue;
+ }
- System.String dirName = args[0];
- out_Renamed.WriteLine("\nOpening index @ " + dirName + "\n");
- Directory dir = null;
- try
- {
- dir = FSDirectory.GetDirectory(dirName);
- }
- catch (System.Exception t)
- {
- out_Renamed.WriteLine("ERROR: could not open directory \"" + dirName + "\"; exiting");
- out_Renamed.Write(t.StackTrace);
- out_Renamed.Flush();
- System.Environment.Exit(1);
- }
-
- bool isClean = Check(dir, doFix);
-
- int exitCode;
- if (isClean)
- exitCode = 0;
- else
- exitCode = 1;
- System.Environment.Exit(exitCode);
- }
- static CheckIndex()
- {
- System.IO.StreamWriter temp_writer;
- temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
- temp_writer.AutoFlush = true;
- out_Renamed = temp_writer;
- }
- }
+ System.Console.WriteLine(" " + (5 - s) + "...");
+ }
+ System.Console.WriteLine("Writing...");
+ checker.FixIndex(result);
+ System.Console.WriteLine("OK");
+ System.Console.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
+ }
+ }
+ System.Console.WriteLine("");
+
+ int exitCode;
+ if (result != null && result.clean == true)
+ exitCode = 0;
+ else
+ exitCode = 1;
+ System.Environment.Exit(exitCode);
+ }
+ }
}
+// /// <summary>Returns true if index is clean, else false.</summary>
+// public static bool Check(Directory dir, bool doFix)
+// {
+// System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
+// SegmentInfos sis = new SegmentInfos();
+
+// try
+// {
+// sis.Read(dir);
+// }
+// catch (System.Exception t)
+// {
+// out_Renamed.WriteLine("ERROR: could not read any segments file in directory");
+// out_Renamed.Write(t.StackTrace);
+// out_Renamed.Flush();
+// return false;
+// }
+
+// int numSegments = sis.Count;
+// System.string segmentsFileName = sis.GetCurrentSegmentFileName();
+// IndexInput input = null;
+// try
+// {
+// input = dir.OpenInput(segmentsFileName);
+// }
+// catch (System.Exception t)
+// {
+// out_Renamed.WriteLine("ERROR: could not open segments file in directory");
+// out_Renamed.Write(t.StackTrace);
+// out_Renamed.Flush();
+// return false;
+// }
+// int format = 0;
+// try
+// {
+// format = input.ReadInt();
+// }
+// catch (System.Exception t)
+// {
+// out_Renamed.WriteLine("ERROR: could not read segment file version in directory");
+// out_Renamed.Write(t.StackTrace);
+// out_Renamed.Flush();
+// return false;
+// }
+// finally
+// {
+// if (input != null)
+// input.Close();
+// }
+
+// System.string sFormat = "";
+// bool skip = false;
+
+// if (format == SegmentInfos.FORMAT)
+// sFormat = "FORMAT [Lucene Pre-2.1]";
+// if (format == SegmentInfos.FORMAT_LOCKLESS)
+// sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
+// else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+// sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
+// else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
+// sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
+// else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE)
+// {
+// sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+// skip = true;
+// }
+// else
+// {
+// sFormat = format + " [Lucene 1.3 or prior]";
+// }
+
+// out_Renamed.WriteLine("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);
+
+// if (skip)
+// {
+// out_Renamed.WriteLine("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+// return false;
+// }
+
+// SegmentInfos newSIS = (SegmentInfos) sis.Clone();
+// newSIS.Clear();
+// bool changed = false;
+// int totLoseDocCount = 0;
+// int numBadSegments = 0;
+// for (int i = 0; i < numSegments; i++)
+// {
+// SegmentInfo info = sis.Info(i);
+// out_Renamed.WriteLine(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+// int toLoseDocCount = info.docCount;
+
+// SegmentReader reader = null;
+
+// try
+// {
+// out_Renamed.WriteLine(" compound=" + info.GetUseCompoundFile());
+// out_Renamed.WriteLine(" numFiles=" + info.Files().Count);
+// out_Renamed.WriteLine(string.Format(nf, " size (MB)={0:f}", new object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
+// int docStoreOffset = info.GetDocStoreOffset();
+// if (docStoreOffset != - 1)
+// {
+// out_Renamed.WriteLine(" docStoreOffset=" + docStoreOffset);
+// out_Renamed.WriteLine(" docStoreSegment=" + info.GetDocStoreSegment());
+// out_Renamed.WriteLine(" docStoreIsCompoundFile=" + info.GetDocStoreIsCompoundFile());
+// }
+// System.string delFileName = info.GetDelFileName();
+// if (delFileName == null)
+// out_Renamed.WriteLine(" no deletions");
+// else
+// out_Renamed.WriteLine(" has deletions [delFileName=" + delFileName + "]");
+// out_Renamed.Write(" test: open reader.........");
+// reader = SegmentReader.Get(info);
+// int numDocs = reader.NumDocs();
+// toLoseDocCount = numDocs;
+// if (reader.HasDeletions())
+// out_Renamed.WriteLine("OK [" + (info.docCount - numDocs) + " deleted docs]");
+// else
+// out_Renamed.WriteLine("OK");
+
+// out_Renamed.Write(" test: fields, norms.......");
+// System.Collections.ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
+// System.Collections.IEnumerator it = fieldNames.GetEnumerator();
+// while (it.MoveNext())
+// {
+// System.string fieldName = (System.string) it.Current;
+// byte[] b = reader.Norms(fieldName);
+// if (b.Length != info.docCount)
+// throw new System.SystemException("norms for field \"" + fieldName + "\" is length " + b.Length + " != maxDoc " + info.docCount);
+// }
+// out_Renamed.WriteLine("OK [" + fieldNames.Count + " fields]");
+
+// out_Renamed.Write(" test: terms, freq, prox...");
+// TermEnum termEnum = reader.Terms();
+// TermPositions termPositions = reader.TermPositions();
+
+// // Used only to count up # deleted docs for this
+// // term
+// MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+
+// long termCount = 0;
+// long totFreq = 0;
+// long totPos = 0;
+// while (termEnum.Next())
+// {
+// termCount++;
+// Term term = termEnum.Term();
+// int docFreq = termEnum.DocFreq();
+// termPositions.Seek(term);
+// int lastDoc = - 1;
+// int freq0 = 0;
+// totFreq += docFreq;
+// while (termPositions.Next())
+// {
+// freq0++;
+// int doc = termPositions.Doc();
+// int freq = termPositions.Freq();
+// if (doc <= lastDoc)
+// {
+// throw new System.SystemException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
+// }
+// lastDoc = doc;
+// if (freq <= 0)
+// {
+// throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+// }
+
+// int lastPos = - 1;
+// totPos += freq;
+// for (int j = 0; j < freq; j++)
+// {
+// int pos = termPositions.NextPosition();
+// if (pos < -1)
+// {
+// throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+// }
+// if (pos < lastPos)
+// {
+// throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+// }
+// }
+// }
+
+// // Now count how many deleted docs occurred in
+// // this term:
+// int delCount;
+// if (reader.HasDeletions())
+// {
+// myTermDocs.Seek(term);
+// while (myTermDocs.Next())
+// {
+// }
+// delCount = myTermDocs.delCount;
+// }
+// else
+// delCount = 0;
+
+// if (freq0 + delCount != docFreq)
+// {
+// throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+// }
+// }
+
+// out_Renamed.WriteLine("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");
+
+// out_Renamed.Write(" test: stored fields.......");
+// int docCount = 0;
+// long totFields = 0;
+// for (int j = 0; j < info.docCount; j++)
+// if (!reader.IsDeleted(j))
+// {
+// docCount++;
+// Document doc = reader.Document(j);
+// totFields += doc.GetFields().Count;
+// }
+
+// if (docCount != reader.NumDocs())
+// throw new System.SystemException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");
+
+// out_Renamed.WriteLine(string.Format(nf, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { totFields, (((float)totFields) / docCount) }));
+
+// out_Renamed.Write(" test: term vectors........");
+// int totVectors = 0;
+// for (int j = 0; j < info.docCount; j++)
+// if (!reader.IsDeleted(j))
+// {
+// TermFreqVector[] tfv = reader.GetTermFreqVectors(j);
+// if (tfv != null)
+// totVectors += tfv.Length;
+// }
+
+// out_Renamed.WriteLine(string.Format(nf, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { totVectors, (((float)totVectors) / docCount) }));
+// out_Renamed.WriteLine("");
+// }
+// catch (System.Exception t)
+// {
+// out_Renamed.WriteLine("FAILED");
+// System.string comment;
+// if (doFix)
+// comment = "will remove reference to this segment (-fix is specified)";
+// else
+// comment = "would remove reference to this segment (-fix was not specified)";
+// out_Renamed.WriteLine(" WARNING: " + comment + "; full exception:");
+// out_Renamed.Write(t.StackTrace);
+// out_Renamed.Flush();
+// out_Renamed.WriteLine("");
+// totLoseDocCount += toLoseDocCount;
+// numBadSegments++;
+// changed = true;
+// continue;
+// }
+// finally
+// {
+// if (reader != null)
+// reader.Close();
+// }
+
+// // Keeper
+// newSIS.Add(info.Clone());
+// }
+
+// if (!changed)
+// {
+// out_Renamed.WriteLine("No problems were detected with this index.\n");
+// return true;
+// }
+// else
+// {
+// out_Renamed.WriteLine("WARNING: " + numBadSegments + " broken segments detected");
+// if (doFix)
+// out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents will be lost");
+// else
+// out_Renamed.WriteLine("WARNING: " + totLoseDocCount + " documents would be lost if -fix were specified");
+// out_Renamed.WriteLine();
+// }
+
+// if (doFix)
+// {
+// out_Renamed.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+// for (int i = 0; i < 5; i++)
+// {
+// try
+// {
+// System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
+// }
+// catch (System.Threading.ThreadInterruptedException)
+// {
+// SupportClass.ThreadClass.Current().Interrupt();
+// i--;
+// continue;
+// }
+
+// out_Renamed.WriteLine(" " + (5 - i) + "...");
+// }
+// out_Renamed.Write("Writing...");
+// try
+// {
+// newSIS.Write(dir);
+// }
+// catch (System.Exception t)
+// {
+// out_Renamed.WriteLine("FAILED; exiting");
+// out_Renamed.Write(t.StackTrace);
+// out_Renamed.Flush();
+// return false;
+// }
+// out_Renamed.WriteLine("OK");
+// out_Renamed.WriteLine("Wrote new segments file \"" + newSIS.GetCurrentSegmentFileName() + "\"");
+// }
+// else
+// {
+// out_Renamed.WriteLine("NOTE: would write new segments file [-fix was not specified]");
+// }
+// out_Renamed.WriteLine("");
+
+// return false;
+// }
+
+// static bool assertsOn;
+
+// private static bool TestAsserts()
+// {
+// assertsOn = true;
+// return true;
+// }
+
+// [STAThread]
+// public static void Main(System.string[] args)
+// {
+
+// bool doFix = false;
+// for (int i = 0; i < args.Length; i++)
+// if (args[i].Equals("-fix"))
+// {
+// doFix = true;
+// break;
+// }
+
+// if (args.Length != (doFix ? 2 : 1))
+// {
+// out_Renamed.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has has any\n"
+ "corruption, else 0.\n");
+// System.Environment.Exit(1);
+// }
+
+// System.Diagnostics.Debug.Assert(TestAsserts());
+// if (!assertsOn)
+// System.Console.WriteLine("\nNote: testing will be more thorough if you run with System.Diagnostic.Debug.Assert() enabled.");
+
+// System.string dirName = args[0];
+// out_Renamed.WriteLine("\nOpening index @ " + dirName + "\n");
+// Directory dir = null;
+// try
+// {
+// dir = FSDirectory.GetDirectory(dirName);
+// }
+// catch (System.Exception t)
+// {
+// out_Renamed.WriteLine("ERROR: could not open directory \"" + dirName + "\"; exiting");
+// out_Renamed.Write(t.StackTrace);
+// out_Renamed.Flush();
+// System.Environment.Exit(1);
+// }
+
+// bool isClean = Check(dir, doFix);
+
+// int exitCode;
+// if (isClean)
+// exitCode = 0;
+// else
+// exitCode = 1;
+// System.Environment.Exit(exitCode);
+// }
+// static CheckIndex()
+// {
+// System.IO.StreamWriter temp_writer;
+// temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
+// temp_writer.AutoFlush = true;
+// out_Renamed = temp_writer;
+// }
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/CompoundFileReader.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileReader.cs Wed Jul 29 18:04:12 2009
@@ -30,11 +30,7 @@
/// <summary> Class for accessing a compound stream.
/// This class implements a directory, but is limited to only read operations.
/// Directory methods that would normally modify data throw an exception.
- ///
- ///
/// </summary>
- /// <version> $Id: CompoundFileReader.java 564236 2007-08-09 15:21:19Z gsingers $
- /// </version>
public class CompoundFileReader : Directory
{
@@ -242,10 +238,19 @@
internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length, int readBufferSize) : base(readBufferSize)
{
- this.base_Renamed = base_Renamed;
+ this.base_Renamed = (IndexInput) base_Renamed.Clone();
this.fileOffset = fileOffset;
this.length = length;
}
+
+ public override object Clone()
+ {
+ CSIndexInput clone = (CSIndexInput)base.Clone();
+ clone.base_Renamed = (IndexInput)base_Renamed.Clone();
+ clone.fileOffset = fileOffset;
+ clone.length = length;
+ return clone;
+ }
/// <summary>Expert: implements buffer refill. Reads bytes from the current
/// position in the input.
@@ -258,15 +263,12 @@
/// </param>
protected internal override void ReadInternal(byte[] b, int offset, int len)
{
- lock (base_Renamed)
- {
- long start = GetFilePointer();
- if (start + len > length)
- throw new System.IO.IOException("read past EOF");
- base_Renamed.Seek(fileOffset + start);
- base_Renamed.ReadBytes(b, offset, len, false);
- }
- }
+ long start = GetFilePointer();
+ if (start + len > length)
+ throw new System.IO.IOException("read past EOF");
+ base_Renamed.Seek(fileOffset + start);
+ base_Renamed.ReadBytes(b, offset, len, false);
+ }
/// <summary>Expert: implements seek. Sets current position in this file, where
/// the next {@link #ReadInternal(byte[],int,int)} will occur.
@@ -280,6 +282,7 @@
/// <summary>Closes the stream to further operations. </summary>
public override void Close()
{
+ base_Renamed.Close();
}
public override long Length()
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/CompoundFileWriter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileWriter.cs Wed Jul 29 18:04:12 2009
@@ -46,8 +46,6 @@
///
///
/// </summary>
- /// <version> $Id: CompoundFileWriter.java 606441 2007-12-22 10:06:28Z mikemccand $
- /// </version>
public sealed class CompoundFileWriter
{
@@ -166,14 +164,25 @@
// Remember the positions of directory entries so that we can
// adjust the offsets later
System.Collections.IEnumerator it = entries.GetEnumerator();
+ long totalSize = 0;
while (it.MoveNext())
{
FileEntry fe = (FileEntry) it.Current;
fe.directoryOffset = os.GetFilePointer();
os.WriteLong(0); // for now
os.WriteString(fe.file);
+ totalSize += directory.FileLength(fe.file);
}
+ // Pre-allocate size of file as optimization --
+ // this can potentially help IO performances as
+ // we write the file and also later during
+ // searchin. It also uncovers a disk-full
+ // situation earlier and hopefully without
+ // actually filling disk to 100%:
+ long finalLength = totalSize + os.GetFilePointer();
+ os.SetLength(finalLength);
+
// Open the files and copy their data into the stream.
// Remember the locations of each file's data section.
byte[] buffer = new byte[16384];
@@ -193,7 +202,9 @@
os.Seek(fe.directoryOffset);
os.WriteLong(fe.dataOffset);
}
-
+
+ System.Diagnostics.Debug.Assert(finalLength == os.Length());
+
// Close the output stream. Set the os to null before trying to
// close so that if an exception occurs during the close, the
// finally clause below will not attempt to close the stream
@@ -234,7 +245,7 @@
while (remainder > 0)
{
int len = (int) System.Math.Min(chunk, remainder);
- is_Renamed.ReadBytes(buffer, 0, len);
+ is_Renamed.ReadBytes(buffer, 0, len, false);
os.WriteBytes(buffer, len);
remainder -= len;
if (checkAbort != null)