You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2008/06/25 04:52:24 UTC
svn commit: r671404 [7/10] -
/incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultiSegmentReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/MultiSegmentReader.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultiSegmentReader.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultiSegmentReader.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,802 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> An IndexReader which reads indexes with multiple segments.</summary>
+ class MultiSegmentReader : DirectoryIndexReader
+ {
+ protected internal SegmentReader[] subReaders;
+ private int[] starts; // 1st docno for each segment
+ private System.Collections.Hashtable normsCache = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
+ private int maxDoc = 0;
+ private int numDocs = - 1;
+ private bool hasDeletions = false;
+
+ /// <summary>Construct reading the named set of readers. </summary>
+ internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory):base(directory, sis, closeDirectory)
+ {
+ // To reduce the chance of hitting FileNotFound
+ // (and having to retry), we open segments in
+ // reverse because IndexWriter merges & deletes
+ // the newest segments first.
+
+ SegmentReader[] readers = new SegmentReader[sis.Count];
+ for (int i = sis.Count - 1; i >= 0; i--)
+ {
+ try
+ {
+ readers[i] = SegmentReader.Get(sis.Info(i));
+ }
+ catch (System.IO.IOException e)
+ {
+ // Close all readers we had opened:
+ for (i++; i < sis.Count; i++)
+ {
+ try
+ {
+ readers[i].Close();
+ }
+ catch (System.IO.IOException ignore)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ throw e;
+ }
+ }
+
+ Initialize(readers);
+ }
+
+ /// <summary>This contructor is only used for {@link #Reopen()} </summary>
+ internal MultiSegmentReader(Directory directory, SegmentInfos infos, bool closeDirectory, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache):base(directory, infos, closeDirectory)
+ {
+
+ // we put the old SegmentReaders in a map, that allows us
+ // to lookup a reader using its segment name
+ System.Collections.IDictionary segmentReaders = new System.Collections.Hashtable();
+
+ if (oldReaders != null)
+ {
+ // create a Map SegmentName->SegmentReader
+ for (int i = 0; i < oldReaders.Length; i++)
+ {
+ segmentReaders[oldReaders[i].GetSegmentName()] = (System.Int32) i;
+ }
+ }
+
+ SegmentReader[] newReaders = new SegmentReader[infos.Count];
+
+ // remember which readers are shared between the old and the re-opened
+ // MultiSegmentReader - we have to incRef those readers
+ bool[] readerShared = new bool[infos.Count];
+
+ for (int i = infos.Count - 1; i >= 0; i--)
+ {
+ // find SegmentReader for this segment
+ Object oldReaderIndex = segmentReaders[infos.Info(i).name];
+ if (oldReaderIndex == null)
+ {
+ // this is a new segment, no old SegmentReader can be reused
+ newReaders[i] = null;
+ }
+ else
+ {
+ // there is an old reader for this segment - we'll try to reopen it
+ newReaders[i] = oldReaders[(System.Int32) oldReaderIndex];
+ }
+
+ bool success = false;
+ try
+ {
+ SegmentReader newReader;
+ if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile())
+ {
+ // this is a new reader; in case we hit an exception we can close it safely
+ newReader = SegmentReader.Get(infos.Info(i));
+ }
+ else
+ {
+ newReader = (SegmentReader) newReaders[i].ReopenSegment(infos.Info(i));
+ }
+ if (newReader == newReaders[i])
+ {
+ // this reader will be shared between the old and the new one,
+ // so we must incRef it
+ readerShared[i] = true;
+ newReader.IncRef();
+ }
+ else
+ {
+ readerShared[i] = false;
+ newReaders[i] = newReader;
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ for (i++; i < infos.Count; i++)
+ {
+ if (newReaders[i] != null)
+ {
+ try
+ {
+ if (!readerShared[i])
+ {
+ // this is a new subReader that is not used by the old one,
+ // we can close it
+ newReaders[i].Close();
+ }
+ else
+ {
+ // this subReader is also used by the old reader, so instead
+ // closing we must decRef it
+ newReaders[i].DecRef();
+ }
+ }
+ catch (System.IO.IOException ignore)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
+ Initialize(newReaders);
+
+ // try to copy unchanged norms from the old normsCache to the new one
+ if (oldNormsCache != null)
+ {
+ System.Collections.IEnumerator it = oldNormsCache.Keys.GetEnumerator();
+ while (it.MoveNext())
+ {
+ System.String field = (System.String) it.Current;
+ if (!HasNorms(field))
+ {
+ continue;
+ }
+
+ byte[] oldBytes = (byte[]) oldNormsCache[field];
+
+ byte[] bytes = new byte[MaxDoc()];
+
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ Object oldReaderIndex = segmentReaders[subReaders[i].GetSegmentName()];
+
+ // this SegmentReader was not re-opened, we can copy all of its norms
+ if (oldReaderIndex != null && (oldReaders[(System.Int32) oldReaderIndex] == subReaders[i] || oldReaders[(System.Int32) oldReaderIndex].norms[field] == subReaders[i].norms[field]))
+ {
+ // we don't have to synchronize here: either this constructor is called from a SegmentReader,
+ // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
+ // which is synchronized
+ Array.Copy(oldBytes, oldStarts[(System.Int32) oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
+ }
+ else
+ {
+ subReaders[i].Norms(field, bytes, starts[i]);
+ }
+ }
+
+ normsCache[field] = bytes; // update cache
+ }
+ }
+ }
+
+ private void Initialize(SegmentReader[] subReaders)
+ {
+ this.subReaders = subReaders;
+ starts = new int[subReaders.Length + 1]; // build starts array
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ starts[i] = maxDoc;
+ maxDoc += subReaders[i].MaxDoc(); // compute maxDocs
+
+ if (subReaders[i].HasDeletions())
+ hasDeletions = true;
+ }
+ starts[subReaders.Length] = maxDoc;
+ }
+
+ protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ if (infos.Count == 1)
+ {
+ // The index has only one segment now, so we can't refresh the MultiSegmentReader.
+ // Return a new SegmentReader instead
+ SegmentReader newReader = SegmentReader.Get(infos, infos.Info(0), false);
+ return newReader;
+ }
+ else
+ {
+ return new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache);
+ }
+ }
+ }
+
+ public override TermFreqVector[] GetTermFreqVectors(int n)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment
+ }
+
+ public override TermFreqVector GetTermFreqVector(int n, System.String field)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVector(n - starts[i], field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper);
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper);
+ }
+
+ public override bool IsOptimized()
+ {
+ return false;
+ }
+
+ public override int NumDocs()
+ {
+ lock (this)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ if (numDocs == - 1)
+ {
+ // check cache
+ int n = 0; // cache miss--recompute
+ for (int i = 0; i < subReaders.Length; i++)
+ n += subReaders[i].NumDocs(); // sum from readers
+ numDocs = n;
+ }
+ return numDocs;
+ }
+ }
+
+ public override int MaxDoc()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return maxDoc;
+ }
+
+ // inherit javadoc
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader
+ }
+
+ public override bool HasDeletions()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return hasDeletions;
+ }
+
+ protected internal override void DoDelete(int n)
+ {
+ numDocs = - 1; // invalidate cache
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader
+ hasDeletions = true;
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].UndeleteAll();
+
+ hasDeletions = false;
+ numDocs = - 1; // invalidate cache
+ }
+
+ private int ReaderIndex(int n)
+ {
+ // find reader for doc n:
+ return ReaderIndex(n, this.starts, this.subReaders.Length);
+ }
+
+ internal static int ReaderIndex(int n, int[] starts, int numSubReaders)
+ {
+ // find reader for doc n:
+ int lo = 0; // search starts array
+ int hi = numSubReaders - 1; // for first element less
+
+ while (hi >= lo)
+ {
+ int mid = (lo + hi) >> 1;
+ int midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else
+ {
+ // found a match
+ while (mid + 1 < numSubReaders && starts[mid + 1] == midValue)
+ {
+ mid++; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
+
+ public override bool HasNorms(System.String field)
+ {
+ EnsureOpen();
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ if (subReaders[i].HasNorms(field))
+ return true;
+ }
+ return false;
+ }
+
+ private byte[] ones;
+ private byte[] fakeNorms()
+ {
+ if (ones == null)
+ ones = SegmentReader.CreateFakeNorms(MaxDoc());
+ return ones;
+ }
+
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = (byte[]) normsCache[field];
+ if (bytes != null)
+ return bytes; // cache hit
+ if (!HasNorms(field))
+ return fakeNorms();
+
+ bytes = new byte[MaxDoc()];
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].Norms(field, bytes, starts[i]);
+ normsCache[field] = bytes; // update cache
+ return bytes;
+ }
+ }
+
+ public override void Norms(System.String field, byte[] result, int offset)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = (byte[]) normsCache[field];
+ if (bytes == null && !HasNorms(field))
+ bytes = fakeNorms();
+ if (bytes != null)
+ // cache hit
+ Array.Copy(bytes, 0, result, offset, MaxDoc());
+
+ for (int i = 0; i < subReaders.Length; i++)
+ // read from segments
+ subReaders[i].Norms(field, result, offset + starts[i]);
+ }
+ }
+
+ protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
+ {
+ normsCache.Remove(field); // clear cache
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return new MultiTermEnum(subReaders, starts, null);
+ }
+
+ public override TermEnum Terms(Term term)
+ {
+ EnsureOpen();
+ return new MultiTermEnum(subReaders, starts, term);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ EnsureOpen();
+ int total = 0; // sum freqs in segments
+ for (int i = 0; i < subReaders.Length; i++)
+ total += subReaders[i].DocFreq(t);
+ return total;
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return new MultiTermDocs(subReaders, starts);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return new MultiTermPositions(subReaders, starts);
+ }
+
+ protected internal override void CommitChanges()
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].Commit();
+ }
+
+ internal override void StartCommit()
+ {
+ base.StartCommit();
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ subReaders[i].StartCommit();
+ }
+ }
+
+ internal override void RollbackCommit()
+ {
+ base.RollbackCommit();
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ subReaders[i].RollbackCommit();
+ }
+ }
+
+ protected internal override void DoClose()
+ {
+ lock (this)
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].DecRef();
+
+ // maybe close directory
+ base.DoClose();
+ }
+ }
+
+ public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
+ {
+ EnsureOpen();
+ return GetFieldNames(fieldNames, this.subReaders);
+ }
+
+ internal static System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames, IndexReader[] subReaders)
+ {
+ // maintain a unique set of field names
+ System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ IndexReader reader = subReaders[i];
+ System.Collections.IEnumerator names = reader.GetFieldNames(fieldNames).GetEnumerator();
+ while (names.MoveNext())
+ {
+ fieldSet.Add(names.Current, names.Current);
+ }
+ }
+ return fieldSet.Keys;
+ }
+
+ // for testing
+ internal virtual SegmentReader[] GetSubReaders()
+ {
+ return subReaders;
+ }
+
+ public override void SetTermInfosIndexDivisor(int indexDivisor)
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].SetTermInfosIndexDivisor(indexDivisor);
+ }
+
+ public override int GetTermInfosIndexDivisor()
+ {
+ if (subReaders.Length > 0)
+ return subReaders[0].GetTermInfosIndexDivisor();
+ else
+ throw new System.SystemException("no readers");
+ }
+
+ internal class MultiTermEnum:TermEnum
+ {
+ private SegmentMergeQueue queue;
+
+ private Term term;
+ private int docFreq;
+
+ public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
+ {
+ queue = new SegmentMergeQueue(readers.Length);
+ for (int i = 0; i < readers.Length; i++)
+ {
+ IndexReader reader = readers[i];
+ TermEnum termEnum;
+
+ if (t != null)
+ {
+ termEnum = reader.Terms(t);
+ }
+ else
+ termEnum = reader.Terms();
+
+ SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
+ if (t == null ? smi.Next() : termEnum.Term() != null)
+ queue.Put(smi);
+ // initialize queue
+ else
+ smi.Close();
+ }
+
+ if (t != null && queue.Size() > 0)
+ {
+ Next();
+ }
+ }
+
+ public override bool Next()
+ {
+ SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
+ if (top == null)
+ {
+ term = null;
+ return false;
+ }
+
+ term = top.term;
+ docFreq = 0;
+
+ while (top != null && term.CompareTo(top.term) == 0)
+ {
+ queue.Pop();
+ docFreq += top.termEnum.DocFreq(); // increment freq
+ if (top.Next())
+ queue.Put(top);
+ // restore queue
+ else
+ top.Close(); // done with a segment
+ top = (SegmentMergeInfo) queue.Top();
+ }
+ return true;
+ }
+
+ public override Term Term()
+ {
+ return term;
+ }
+
+ public override int DocFreq()
+ {
+ return docFreq;
+ }
+
+ public override void Close()
+ {
+ queue.Close();
+ }
+ }
+
+ internal class MultiTermDocs : TermDocs
+ {
+ protected internal IndexReader[] readers;
+ protected internal int[] starts;
+ protected internal Term term;
+
+ protected internal int base_Renamed = 0;
+ protected internal int pointer = 0;
+
+ private TermDocs[] readerTermDocs;
+ protected internal TermDocs current; // == readerTermDocs[pointer]
+
+ public MultiTermDocs(IndexReader[] r, int[] s)
+ {
+ readers = r;
+ starts = s;
+
+ readerTermDocs = new TermDocs[r.Length];
+ }
+
+ public virtual int Doc()
+ {
+ return base_Renamed + current.Doc();
+ }
+ public virtual int Freq()
+ {
+ return current.Freq();
+ }
+
+ public virtual void Seek(Term term)
+ {
+ this.term = term;
+ this.base_Renamed = 0;
+ this.pointer = 0;
+ this.current = null;
+ }
+
+ public virtual void Seek(TermEnum termEnum)
+ {
+ Seek(termEnum.Term());
+ }
+
+ public virtual bool Next()
+ {
+ for (; ; )
+ {
+ if (current != null && current.Next())
+ {
+ return true;
+ }
+ else if (pointer < readers.Length)
+ {
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ /// <summary>Optimized implementation. </summary>
+ public virtual int Read(int[] docs, int[] freqs)
+ {
+ while (true)
+ {
+ while (current == null)
+ {
+ if (pointer < readers.Length)
+ {
+ // try next segment
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ int end = current.Read(docs, freqs);
+ if (end == 0)
+ {
+ // none left in segment
+ current = null;
+ }
+ else
+ {
+ // got some
+ int b = base_Renamed; // adjust doc numbers
+ for (int i = 0; i < end; i++)
+ docs[i] += b;
+ return end;
+ }
+ }
+ }
+
+ /* A Possible future optimization could skip entire segments */
+ public virtual bool SkipTo(int target)
+ {
+ for (; ; )
+ {
+ if (current != null && current.SkipTo(target - base_Renamed))
+ {
+ return true;
+ }
+ else if (pointer < readers.Length)
+ {
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ return false;
+ }
+ }
+
+ private TermDocs TermDocs(int i)
+ {
+ if (term == null)
+ return null;
+ TermDocs result = readerTermDocs[i];
+ if (result == null)
+ result = readerTermDocs[i] = TermDocs(readers[i]);
+ result.Seek(term);
+ return result;
+ }
+
+ protected internal virtual TermDocs TermDocs(IndexReader reader)
+ {
+ return reader.TermDocs();
+ }
+
+ public virtual void Close()
+ {
+ for (int i = 0; i < readerTermDocs.Length; i++)
+ {
+ if (readerTermDocs[i] != null)
+ readerTermDocs[i].Close();
+ }
+ }
+ }
+
+ internal class MultiTermPositions:MultiTermDocs, TermPositions
+ {
+ public MultiTermPositions(IndexReader[] r, int[] s):base(r, s)
+ {
+ }
+
+ protected internal override TermDocs TermDocs(IndexReader reader)
+ {
+ return (TermDocs) reader.TermPositions();
+ }
+
+ public virtual int NextPosition()
+ {
+ return ((TermPositions) current).NextPosition();
+ }
+
+ public virtual int GetPayloadLength()
+ {
+ return ((TermPositions) current).GetPayloadLength();
+ }
+
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ return ((TermPositions) current).GetPayload(data, offset);
+ }
+
+
+ // TODO: Remove warning after API has been finalized
+ public virtual bool IsPayloadAvailable()
+ {
+ return ((TermPositions) current).IsPayloadAvailable();
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultipleTermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/MultipleTermPositions.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultipleTermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultipleTermPositions.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
*/
using System;
+
using PriorityQueue = Lucene.Net.Util.PriorityQueue;
namespace Lucene.Net.Index
@@ -217,5 +218,29 @@
{
throw new System.NotSupportedException();
}
+
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual int GetPayloadLength()
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// <summary> </summary>
+ /// <returns> false
+ /// </returns>
+ // TODO: Remove warning after API has been finalized
+ public virtual bool IsPayloadAvailable()
+ {
+ return false;
+ }
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Package.html
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Package.html?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Package.html (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Package.html Tue Jun 24 19:52:22 2008
@@ -1,10 +1,10 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
- <meta name="Author" content="Doug Cutting">
-</head>
-<body>
-Code to maintain and access indices.
-</body>
-</html>
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+ <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+Code to maintain and access indices.
+</body>
+</html>
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ParallelReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/ParallelReader.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ParallelReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ParallelReader.cs Tue Jun 24 19:52:22 2008
@@ -16,10 +16,11 @@
*/
using System;
+
using Document = Lucene.Net.Documents.Document;
-using Fieldable = Lucene.Net.Documents.Fieldable;
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
+using Fieldable = Lucene.Net.Documents.Fieldable;
namespace Lucene.Net.Index
{
@@ -45,22 +46,37 @@
public class ParallelReader : IndexReader
{
private System.Collections.ArrayList readers = new System.Collections.ArrayList();
+ private System.Collections.IList decrefOnClose = new System.Collections.ArrayList(); // remember which subreaders to decRef on close
+ internal bool incRefReaders = false;
private System.Collections.SortedList fieldToReader = new System.Collections.SortedList();
private System.Collections.IDictionary readerToFields = new System.Collections.Hashtable();
- private System.Collections.ArrayList storedFieldReaders = new System.Collections.ArrayList();
+ private System.Collections.IList storedFieldReaders = new System.Collections.ArrayList();
private int maxDoc;
private int numDocs;
private bool hasDeletions;
+ /// <summary>Construct a ParallelReader.
+ /// <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
+ /// </summary>
+ public ParallelReader() : this(true)
+ {
+ }
+
/// <summary>Construct a ParallelReader. </summary>
- public ParallelReader() : base(null)
+ /// <param name="closeSubReaders">indicates whether the subreaders should be closed
+ /// when this ParallelReader is closed
+ /// </param>
+ public ParallelReader(bool closeSubReaders) : base()
{
+ this.incRefReaders = !closeSubReaders;
}
/// <summary>Add an IndexReader. </summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
public virtual void Add(IndexReader reader)
{
+ EnsureOpen();
Add(reader, false);
}
@@ -75,9 +91,11 @@
/// <throws> IllegalArgumentException if not all indexes have the same value </throws>
/// <summary> of {@link IndexReader#MaxDoc()}
/// </summary>
+ /// <throws> IOException if there is a low-level IO error </throws>
public virtual void Add(IndexReader reader, bool ignoreStoredFields)
{
+ EnsureOpen();
if (readers.Count == 0)
{
this.maxDoc = reader.MaxDoc();
@@ -107,26 +125,143 @@
if (!ignoreStoredFields)
storedFieldReaders.Add(reader); // add to storedFieldReaders
readers.Add(reader);
+
+ if (incRefReaders)
+ {
+ reader.IncRef();
+ }
+ decrefOnClose.Add(incRefReaders);
}
+ /// <summary> Tries to reopen the subreaders.
+ /// <br>
+ /// If one or more subreaders could be re-opened (i. e. subReader.reopen()
+ /// returned a new instance != subReader), then a new ParallelReader instance
+ /// is returned, otherwise this instance is returned.
+ /// <p>
+ /// A re-opened instance might share one or more subreaders with the old
+ /// instance. Index modification operations result in undefined behavior
+ /// when performed before the old instance is closed.
+ /// (see {@link IndexReader#Reopen()}).
+ /// <p>
+ /// If subreaders are shared, then the reference count of those
+ /// readers is increased to ensure that the subreaders remain open
+ /// until the last referring reader is closed.
+ ///
+ /// </summary>
+ /// <throws> CorruptIndexException if the index is corrupt </throws>
+ /// <throws> IOException if there is a low-level IO error </throws>
+ public override IndexReader Reopen()
+ {
+ EnsureOpen();
+
+ bool reopened = false;
+ System.Collections.IList newReaders = new System.Collections.ArrayList();
+ System.Collections.IList newDecrefOnClose = new System.Collections.ArrayList();
+
+ bool success = false;
+
+ try
+ {
+
+ for (int i = 0; i < readers.Count; i++)
+ {
+ IndexReader oldReader = (IndexReader) readers[i];
+ IndexReader newReader = oldReader.Reopen();
+ newReaders.Add(newReader);
+ // if at least one of the subreaders was updated we remember that
+ // and return a new MultiReader
+ if (newReader != oldReader)
+ {
+ reopened = true;
+ }
+ }
+
+ if (reopened)
+ {
+ ParallelReader pr = new ParallelReader();
+ for (int i = 0; i < readers.Count; i++)
+ {
+ IndexReader oldReader = (IndexReader) readers[i];
+ IndexReader newReader = (IndexReader) newReaders[i];
+ if (newReader == oldReader)
+ {
+ newDecrefOnClose.Add(true);
+ newReader.IncRef();
+ }
+ else
+ {
+ // this is a new subreader instance, so on close() we don't
+ // decRef but close it
+ newDecrefOnClose.Add(false);
+ }
+ pr.Add(newReader, !storedFieldReaders.Contains(oldReader));
+ }
+ pr.decrefOnClose = newDecrefOnClose;
+ pr.incRefReaders = incRefReaders;
+ success = true;
+ return pr;
+ }
+ else
+ {
+ success = true;
+ // No subreader was refreshed
+ return this;
+ }
+ }
+ finally
+ {
+ if (!success && reopened)
+ {
+ for (int i = 0; i < newReaders.Count; i++)
+ {
+ IndexReader r = (IndexReader) newReaders[i];
+ if (r != null)
+ {
+ try
+ {
+ if (((System.Boolean) newDecrefOnClose[i]))
+ {
+ r.DecRef();
+ }
+ else
+ {
+ r.Close();
+ }
+ }
+ catch (System.IO.IOException ignore)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+ }
+
+
public override int NumDocs()
{
+ // Don't call ensureOpen() here (it could affect performance)
return numDocs;
}
public override int MaxDoc()
{
+ // Don't call ensureOpen() here (it could affect performance)
return maxDoc;
}
public override bool HasDeletions()
{
+ // Don't call ensureOpen() here (it could affect performance)
return hasDeletions;
}
// check first reader
public override bool IsDeleted(int n)
{
+ // Don't call ensureOpen() here (it could affect performance)
if (readers.Count > 0)
return ((IndexReader) readers[0]).IsDeleted(n);
return false;
@@ -155,6 +290,7 @@
// append fields from storedFieldReaders
public override Document Document(int n, FieldSelector fieldSelector)
{
+ EnsureOpen();
Document result = new Document();
for (int i = 0; i < storedFieldReaders.Count; i++)
{
@@ -188,6 +324,7 @@
// get all vectors
public override TermFreqVector[] GetTermFreqVectors(int n)
{
+ EnsureOpen();
System.Collections.ArrayList results = new System.Collections.ArrayList();
System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator();
while (i.MoveNext())
@@ -204,24 +341,54 @@
public override TermFreqVector GetTermFreqVector(int n, System.String field)
{
+ EnsureOpen();
IndexReader reader = ((IndexReader) fieldToReader[field]);
return reader == null ? null : reader.GetTermFreqVector(n, field);
}
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ IndexReader reader = ((IndexReader) fieldToReader[field]);
+ if (reader != null)
+ {
+ reader.GetTermFreqVector(docNumber, field, mapper);
+ }
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ EnsureOpen();
+
+ System.Collections.IEnumerator i = fieldToReader.GetEnumerator();
+ while (i.MoveNext())
+ {
+ System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
+ System.String field = (System.String) e.Key;
+ IndexReader reader = (IndexReader) e.Value;
+ reader.GetTermFreqVector(docNumber, field, mapper);
+ }
+ }
+
public override bool HasNorms(System.String field)
{
+ EnsureOpen();
IndexReader reader = ((IndexReader) fieldToReader[field]);
return reader == null ? false : reader.HasNorms(field);
}
public override byte[] Norms(System.String field)
{
+ EnsureOpen();
IndexReader reader = ((IndexReader) fieldToReader[field]);
return reader == null ? null : reader.Norms(field);
}
public override void Norms(System.String field, byte[] result, int offset)
{
+ EnsureOpen();
IndexReader reader = ((IndexReader) fieldToReader[field]);
if (reader != null)
reader.Norms(field, result, offset);
@@ -236,40 +403,91 @@
public override TermEnum Terms()
{
+ EnsureOpen();
return new ParallelTermEnum(this);
}
public override TermEnum Terms(Term term)
{
+ EnsureOpen();
return new ParallelTermEnum(this, term);
}
public override int DocFreq(Term term)
{
+ EnsureOpen();
IndexReader reader = ((IndexReader) fieldToReader[term.Field()]);
return reader == null ? 0 : reader.DocFreq(term);
}
public override TermDocs TermDocs(Term term)
{
+ EnsureOpen();
return new ParallelTermDocs(this, term);
}
public override TermDocs TermDocs()
{
+ EnsureOpen();
return new ParallelTermDocs(this);
}
public override TermPositions TermPositions(Term term)
{
+ EnsureOpen();
return new ParallelTermPositions(this, term);
}
public override TermPositions TermPositions()
{
+ EnsureOpen();
return new ParallelTermPositions(this);
}
+ /// <summary> Checks recursively if all subreaders are up to date. </summary>
+ public override bool IsCurrent()
+ {
+ for (int i = 0; i < readers.Count; i++)
+ {
+ if (!((IndexReader) readers[i]).IsCurrent())
+ {
+ return false;
+ }
+ }
+
+ // all subreaders are up to date
+ return true;
+ }
+
+ /// <summary> Checks recursively if all subindexes are optimized </summary>
+ public override bool IsOptimized()
+ {
+ for (int i = 0; i < readers.Count; i++)
+ {
+ if (!((IndexReader) readers[i]).IsOptimized())
+ {
+ return false;
+ }
+ }
+
+ // all subindexes are optimized
+ return true;
+ }
+
+
+ /// <summary>Not implemented.</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override long GetVersion()
+ {
+ throw new System.NotSupportedException("ParallelReader does not support this method.");
+ }
+
+ // for testing
+ internal virtual IndexReader[] GetSubReaders()
+ {
+ return (IndexReader[]) readers.ToArray(typeof(IndexReader));
+ }
+
protected internal override void DoCommit()
{
for (int i = 0; i < readers.Count; i++)
@@ -281,13 +499,22 @@
lock (this)
{
for (int i = 0; i < readers.Count; i++)
- ((IndexReader) readers[i]).Close();
+ {
+ if (((System.Boolean) decrefOnClose[i]))
+ {
+ ((IndexReader) readers[i]).DecRef();
+ }
+ else
+ {
+ ((IndexReader) readers[i]).Close();
+ }
+ }
}
}
-
public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
{
+ EnsureOpen();
System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
for (int i = 0; i < readers.Count; i++)
{
@@ -512,6 +739,23 @@
// It is an error to call this if there is no next position, e.g. if termDocs==null
return ((TermPositions) termDocs).NextPosition();
}
+
+ public virtual int GetPayloadLength()
+ {
+ return ((TermPositions) termDocs).GetPayloadLength();
+ }
+
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ return ((TermPositions) termDocs).GetPayload(data, offset);
+ }
+
+
+ // TODO: Remove warning after API has been finalized
+ public virtual bool IsPayloadAvailable()
+ {
+ return ((TermPositions) termDocs).IsPayloadAvailable();
+ }
}
}
}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Payload.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Payload.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Payload.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Payload.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using Token = Lucene.Net.Analysis.Token;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> A Payload is metadata that can be stored together with each occurrence
+ /// of a term. This metadata is stored inline in the posting list of the
+ /// specific term.
+ /// <p>
+ /// To store payloads in the index a {@link TokenStream} has to be used that
+ /// produces {@link Token}s containing payload data.
+ /// <p>
+ /// Use {@link TermPositions#GetPayloadLength()} and {@link TermPositions#GetPayload(byte[], int)}
+ /// to retrieve the payloads from the index.<br>
+ ///
+ /// </summary>
+ [Serializable]
+ public class Payload : System.ICloneable
+ {
+ /// <summary>the byte array containing the payload data </summary>
+ protected internal byte[] data;
+
+ /// <summary>the offset within the byte array </summary>
+ protected internal int offset;
+
+ /// <summary>the length of the payload data </summary>
+ protected internal int length;
+
+ /// <summary>Creates an empty payload and does not allocate a byte array. </summary>
+ public Payload()
+ {
+ // nothing to do
+ }
+
+ /// <summary> Creates a new payload with the the given array as data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ ///
+ /// </summary>
+ /// <param name="data">the data of this payload
+ /// </param>
+ public Payload(byte[] data):this(data, 0, data.Length)
+ {
+ }
+
+ /// <summary> Creates a new payload with the the given array as data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ ///
+ /// </summary>
+ /// <param name="data">the data of this payload
+ /// </param>
+ /// <param name="offset">the offset in the data byte array
+ /// </param>
+ /// <param name="length">the length of the data
+ /// </param>
+ public Payload(byte[] data, int offset, int length)
+ {
+ if (offset < 0 || offset + length > data.Length)
+ {
+ throw new System.ArgumentException();
+ }
+ this.data = data;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ /// <summary> Sets this payloads data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ /// </summary>
+ public virtual void SetData(byte[] data)
+ {
+ SetData(data, 0, data.Length);
+ }
+
+ /// <summary> Sets this payloads data.
+ /// A reference to the passed-in array is held, i. e. no
+ /// copy is made.
+ /// </summary>
+ public virtual void SetData(byte[] data, int offset, int length)
+ {
+ this.data = data;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ /// <summary> Returns a reference to the underlying byte array
+ /// that holds this payloads data.
+ /// </summary>
+ public virtual byte[] GetData()
+ {
+ return this.data;
+ }
+
+ /// <summary> Returns the offset in the underlying byte array </summary>
+ public virtual int GetOffset()
+ {
+ return this.offset;
+ }
+
+ /// <summary> Returns the length of the payload data. </summary>
+ public virtual int Length()
+ {
+ return this.length;
+ }
+
+ /// <summary> Returns the byte at the given index.</summary>
+ public virtual byte ByteAt(int index)
+ {
+ if (0 <= index && index < this.length)
+ {
+ return this.data[this.offset + index];
+ }
+ throw new System. IndexOutOfRangeException("Index of bound " + index);
+ }
+
+ /// <summary> Allocates a new byte array, copies the payload data into it and returns it. </summary>
+ public virtual byte[] ToByteArray()
+ {
+ byte[] retArray = new byte[this.length];
+ Array.Copy(this.data, this.offset, retArray, 0, this.length);
+ return retArray;
+ }
+
+ /// <summary> Copies the payload data to a byte array.
+ ///
+ /// </summary>
+ /// <param name="target">the target byte array
+ /// </param>
+ /// <param name="targetOffset">the offset in the target byte array
+ /// </param>
+ public virtual void CopyTo(byte[] target, int targetOffset)
+ {
+ if (this.length > target.Length + targetOffset)
+ {
+ throw new System.IndexOutOfRangeException();
+ }
+ Array.Copy(this.data, this.offset, target, targetOffset, this.length);
+ }
+
+ /// <summary> Clones this payload by creating a copy of the underlying
+ /// byte array.
+ /// </summary>
+ public virtual System.Object Clone()
+ {
+ Payload clone = new Payload(this.ToByteArray());
+ return clone;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> For each Field, store position by position information. It ignores frequency information
+ /// <p/>
+ /// This is not thread-safe.
+ /// </summary>
+ public class PositionBasedTermVectorMapper : TermVectorMapper
+ {
+ private System.Collections.IDictionary fieldToTerms;
+
+ private System.String currentField;
+ /// <summary> A Map of Integer and TVPositionInfo</summary>
+ private System.Collections.IDictionary currentPositions;
+ private bool storeOffsets;
+
+
+
+
+ /// <summary>
+ ///
+ /// </summary>
+ public PositionBasedTermVectorMapper():base(false, false)
+ {
+ }
+
+ public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets)
+ {
+ }
+
+ /// <summary> Never ignores positions. This mapper doesn't make much sense unless there are positions</summary>
+ /// <returns> false
+ /// </returns>
+ public override bool IsIgnoringPositions()
+ {
+ return false;
+ }
+
+ /// <summary> Callback for the TermVectorReader. </summary>
+ /// <param name="term">
+ /// </param>
+ /// <param name="frequency">
+ /// </param>
+ /// <param name="offsets">
+ /// </param>
+ /// <param name="positions">
+ /// </param>
+ public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ for (int i = 0; i < positions.Length; i++)
+ {
+ System.Int32 posVal = (System.Int32) positions[i];
+ TVPositionInfo pos = (TVPositionInfo) currentPositions[posVal];
+ if (pos == null)
+ {
+ pos = new TVPositionInfo(positions[i], storeOffsets);
+ currentPositions[posVal] = pos;
+ }
+ pos.AddTerm(term, offsets != null?offsets[i]:null);
+ }
+ }
+
+ /// <summary> Callback mechanism used by the TermVectorReader</summary>
+ /// <param name="field"> The field being read
+ /// </param>
+ /// <param name="numTerms">The number of terms in the vector
+ /// </param>
+ /// <param name="storeOffsets">Whether offsets are available
+ /// </param>
+ /// <param name="storePositions">Whether positions are available
+ /// </param>
+ public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+ {
+ if (storePositions == false)
+ {
+ throw new System.SystemException("You must store positions in order to use this Mapper");
+ }
+ if (storeOffsets == true)
+ {
+ //ignoring offsets
+ }
+ fieldToTerms = new System.Collections.Hashtable(numTerms);
+ this.storeOffsets = storeOffsets;
+ currentField = field;
+ currentPositions = new System.Collections.Hashtable();
+ fieldToTerms[currentField] = currentPositions;
+ }
+
+ /// <summary> Get the mapping between fields and terms, sorted by the comparator
+ ///
+ /// </summary>
+ /// <returns> A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo}.
+ /// </returns>
+ public virtual System.Collections.IDictionary GetFieldToTerms()
+ {
+ return fieldToTerms;
+ }
+
+ /// <summary> Container for a term at a position</summary>
+ public class TVPositionInfo
+ {
+ private int position;
+ //a list of Strings
+ private System.Collections.IList terms;
+ //A list of TermVectorOffsetInfo
+ private System.Collections.IList offsets;
+
+
+ public TVPositionInfo(int position, bool storeOffsets)
+ {
+ this.position = position;
+ terms = new System.Collections.ArrayList();
+ if (storeOffsets)
+ {
+ offsets = new System.Collections.ArrayList();
+ }
+ }
+
+ internal virtual void AddTerm(System.String term, TermVectorOffsetInfo info)
+ {
+ terms.Add(term);
+ if (offsets != null)
+ {
+ offsets.Add(info);
+ }
+ }
+
+ /// <summary> </summary>
+ /// <returns> The position of the term
+ /// </returns>
+ public virtual int GetPosition()
+ {
+ return position;
+ }
+
+ /// <summary> Note, there may be multiple terms at the same position</summary>
+ /// <returns> A List of Strings
+ /// </returns>
+ public virtual System.Collections.IList GetTerms()
+ {
+ return terms;
+ }
+
+ /// <summary> Parallel list (to {@link #GetTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position</summary>
+ /// <returns> A List of TermVectorOffsetInfo objects, if offsets are store.
+ /// </returns>
+ public virtual System.Collections.IList GetOffsets()
+ {
+ return offsets;
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentInfo.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
*/
using System;
+
using Directory = Lucene.Net.Store.Directory;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using IndexInput = Lucene.Net.Store.IndexInput;
@@ -25,6 +26,12 @@
sealed public class SegmentInfo : System.ICloneable
{
+
+ internal const int NO = - 1; // e.g. no norms; no deletes;
+ internal const int YES = 1; // e.g. have norms; have deletes;
+ internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
+ internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it.
+
public System.String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
@@ -32,17 +39,21 @@
private bool preLockless; // true if this is a segments file written before
// lock-less commits (2.1)
- private long delGen; // current generation of del file; -1 if there
- // are no deletes; 0 if it's a pre-2.1 segment
- // (and we must check filesystem); 1 or higher if
+ private long delGen; // current generation of del file; NO if there
+ // are no deletes; CHECK_DIR if it's a pre-2.1 segment
+ // (and we must check filesystem); YES or higher if
// there are deletes at generation N
- private long[] normGen; // current generations of each field's norm file.
- // If this array is null, we must check filesystem
- // when preLockLess is true. Else,
- // there are no separate norms
+ private long[] normGen; // current generation of each field's norm file.
+ // If this array is null, for lockLess this means no
+ // separate norms. For preLockLess this means we must
+ // check filesystem. If this array is not null, its
+ // values mean: NO says this field has no separate
+ // norms; CHECK_DIR says it is a preLockLess segment and
+ // filesystem must be checked; >= YES says this field
+ // has separate norms with the specified generation
- private sbyte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's
+ private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
// pre-2.1 (ie, must check file system to see
// if <name>.cfs and <name>.nrm exist)
@@ -52,32 +63,57 @@
// and true for newly created merged segments (both
// compound and non compound).
+ private System.Collections.IList files; // cached list of files that this segment uses
+ // in the Directory
+
+ internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand)
+
+ private int docStoreOffset; // if this segment shares stored fields & vectors, this
+ // offset is where in that file this segment's docs begin
+ private System.String docStoreSegment; // name used to derive fields/vectors file we share with
+ // other segments
+ private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
+
public SegmentInfo(System.String name, int docCount, Directory dir)
{
this.name = name;
this.docCount = docCount;
this.dir = dir;
- delGen = - 1;
- isCompoundFile = 0;
+ delGen = NO;
+ isCompoundFile = (sbyte) (CHECK_DIR);
preLockless = true;
hasSingleNormFile = false;
+ docStoreOffset = - 1;
+ docStoreSegment = name;
+ docStoreIsCompoundFile = false;
+ }
+
+ public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile) : this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false)
+ {
}
- public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile) : this(name, docCount, dir)
+ public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile) : this(name, docCount, dir)
{
- this.isCompoundFile = (sbyte) (isCompoundFile ? 1 : - 1);
+ this.isCompoundFile = (sbyte) (isCompoundFile ? YES : NO);
this.hasSingleNormFile = hasSingleNormFile;
preLockless = false;
+ this.docStoreOffset = docStoreOffset;
+ this.docStoreSegment = docStoreSegment;
+ this.docStoreIsCompoundFile = docStoreIsCompoundFile;
+ System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null);
}
/// <summary> Copy everything from src SegmentInfo into our instance.</summary>
internal void Reset(SegmentInfo src)
{
+ ClearFiles();
name = src.name;
docCount = src.docCount;
dir = src.dir;
preLockless = src.preLockless;
delGen = src.delGen;
+ docStoreOffset = src.docStoreOffset;
+ docStoreIsCompoundFile = src.docStoreIsCompoundFile;
if (src.normGen == null)
{
normGen = null;
@@ -101,7 +137,7 @@
/// </param>
/// <param name="input">input handle to read segment info from
/// </param>
- public SegmentInfo(Directory dir, int format, IndexInput input)
+ internal SegmentInfo(Directory dir, int format, IndexInput input)
{
this.dir = dir;
name = input.ReadString();
@@ -109,6 +145,26 @@
if (format <= SegmentInfos.FORMAT_LOCKLESS)
{
delGen = input.ReadLong();
+ if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
+ {
+ docStoreOffset = input.ReadInt();
+ if (docStoreOffset != - 1)
+ {
+ docStoreSegment = input.ReadString();
+ docStoreIsCompoundFile = (1 == input.ReadByte());
+ }
+ else
+ {
+ docStoreSegment = name;
+ docStoreIsCompoundFile = false;
+ }
+ }
+ else
+ {
+ docStoreOffset = - 1;
+ docStoreSegment = name;
+ docStoreIsCompoundFile = false;
+ }
if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
{
hasSingleNormFile = (1 == input.ReadByte());
@@ -118,7 +174,7 @@
hasSingleNormFile = false;
}
int numNormGen = input.ReadInt();
- if (numNormGen == - 1)
+ if (numNormGen == NO)
{
normGen = null;
}
@@ -131,15 +187,18 @@
}
}
isCompoundFile = (sbyte) input.ReadByte();
- preLockless = isCompoundFile == 0;
+ preLockless = (isCompoundFile == CHECK_DIR);
}
else
{
- delGen = 0;
+ delGen = CHECK_DIR;
normGen = null;
- isCompoundFile = 0;
+ isCompoundFile = (sbyte) (CHECK_DIR);
preLockless = true;
hasSingleNormFile = false;
+ docStoreOffset = - 1;
+ docStoreIsCompoundFile = false;
+ docStoreSegment = null;
}
}
@@ -152,39 +211,66 @@
// norms set against it yet:
normGen = new long[numFields];
- if (!preLockless)
+ if (preLockless)
+ {
+ // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
+ // we have to check filesystem for norm files, because this is prelockless.
+ }
+ else
{
// This is a FORMAT_LOCKLESS segment, which means
- // there are no norms:
+ // there are no separate norms:
for (int i = 0; i < numFields; i++)
{
- normGen[i] = - 1;
+ normGen[i] = NO;
}
}
}
}
+ /// <summary>Returns total size in bytes of all of files used by
+ /// this segment.
+ /// </summary>
+ internal long SizeInBytes()
+ {
+ if (sizeInBytes == - 1)
+ {
+ System.Collections.IList files = Files();
+ int size = files.Count;
+ sizeInBytes = 0;
+ for (int i = 0; i < size; i++)
+ {
+ System.String fileName = (System.String) files[i];
+ // We don't count bytes used by a shared doc store
+ // against this segment:
+ if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName))
+ sizeInBytes += dir.FileLength(fileName);
+ }
+ }
+ return sizeInBytes;
+ }
+
internal bool HasDeletions()
{
// Cases:
//
- // delGen == -1: this means this segment was written
+ // delGen == NO: this means this segment was written
// by the LOCKLESS code and for certain does not have
// deletions yet
//
- // delGen == 0: this means this segment was written by
+ // delGen == CHECK_DIR: this means this segment was written by
// pre-LOCKLESS code which means we must check
// directory to see if .del file exists
//
- // delGen > 0: this means this segment was written by
+ // delGen >= YES: this means this segment was written by
// the LOCKLESS code and for certain has
// deletions
//
- if (delGen == - 1)
+ if (delGen == NO)
{
return false;
}
- else if (delGen > 0)
+ else if (delGen >= YES)
{
return true;
}
@@ -197,19 +283,21 @@
internal void AdvanceDelGen()
{
// delGen 0 is reserved for pre-LOCKLESS format
- if (delGen == - 1)
+ if (delGen == NO)
{
- delGen = 1;
+ delGen = YES;
}
else
{
delGen++;
}
+ ClearFiles();
}
internal void ClearDelGen()
{
- delGen = - 1;
+ delGen = NO;
+ ClearFiles();
}
public System.Object Clone()
@@ -224,12 +312,15 @@
si.normGen = new long[normGen.Length];
normGen.CopyTo(si.normGen, 0);
}
+ si.docStoreOffset = docStoreOffset;
+ si.docStoreSegment = docStoreSegment;
+ si.docStoreIsCompoundFile = docStoreIsCompoundFile;
return si;
}
internal System.String GetDelFileName()
{
- if (delGen == - 1)
+ if (delGen == NO)
{
// In this case we know there is no deletion filename
// against this segment
@@ -237,8 +328,8 @@
}
else
{
- // If delGen is 0, it's the pre-lockless-commit file format
- return IndexFileNames.FileNameFromGeneration(name, ".del", delGen);
+ // If delGen is CHECK_DIR, it's the pre-lockless-commit file format
+ return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@@ -249,13 +340,13 @@
/// </param>
internal bool HasSeparateNorms(int fieldNumber)
{
- if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0))
+ if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR))
{
// Must fallback to directory file exists check:
System.String fileName = name + ".s" + fieldNumber;
return dir.FileExists(fileName);
}
- else if (normGen == null || normGen[fieldNumber] == - 1)
+ else if (normGen == null || normGen[fieldNumber] == NO)
{
return false;
}
@@ -282,6 +373,11 @@
// code. So we must fallback to the original
// directory list check:
System.String[] result = dir.List();
+ if (result == null)
+ {
+ throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
+ }
+
System.String pattern;
pattern = name + ".s";
int patternLength = pattern.Length;
@@ -296,11 +392,11 @@
else
{
// This means this segment was saved with LOCKLESS
- // code so we first check whether any normGen's are >
- // 0 (meaning they definitely have separate norms):
+ // code so we first check whether any normGen's are >= 1
+ // (meaning they definitely have separate norms):
for (int i = 0; i < normGen.Length; i++)
{
- if (normGen[i] > 0)
+ if (normGen[i] >= YES)
{
return true;
}
@@ -309,7 +405,7 @@
// pre-LOCKLESS and must be checked in directory:
for (int i = 0; i < normGen.Length; i++)
{
- if (normGen[i] == 0)
+ if (normGen[i] == CHECK_DIR)
{
if (HasSeparateNorms(i))
{
@@ -330,14 +426,15 @@
/// </param>
internal void AdvanceNormGen(int fieldIndex)
{
- if (normGen[fieldIndex] == - 1)
+ if (normGen[fieldIndex] == NO)
{
- normGen[fieldIndex] = 1;
+ normGen[fieldIndex] = YES;
}
else
{
normGen[fieldIndex]++;
}
+ ClearFiles();
}
/// <summary> Get the file name for the norms file for this field.
@@ -352,7 +449,7 @@
long gen;
if (normGen == null)
{
- gen = 0;
+ gen = CHECK_DIR;
}
else
{
@@ -370,12 +467,12 @@
{
// case 2: lockless (or nrm file exists) - single file for all norms
prefix = "." + IndexFileNames.NORMS_EXTENSION;
- return IndexFileNames.FileNameFromGeneration(name, prefix, 0);
+ return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN);
}
// case 3: norm file for each field
prefix = ".f";
- return IndexFileNames.FileNameFromGeneration(name, prefix + number, 0);
+ return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN);
}
/// <summary> Mark whether this segment is stored as a compound file.
@@ -388,12 +485,13 @@
{
if (isCompoundFile)
{
- this.isCompoundFile = 1;
+ this.isCompoundFile = (sbyte) (YES);
}
else
{
- this.isCompoundFile = - 1;
+ this.isCompoundFile = (sbyte) (NO);
}
+ ClearFiles();
}
/// <summary> Returns true if this segment is stored as a compound
@@ -401,30 +499,64 @@
/// </summary>
internal bool GetUseCompoundFile()
{
- if (isCompoundFile == - 1)
+ if (isCompoundFile == NO)
{
return false;
}
- else if (isCompoundFile == 1)
+ else if (isCompoundFile == YES)
{
return true;
}
else
{
- return dir.FileExists(name + ".cfs");
+ return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
}
}
+ internal int GetDocStoreOffset()
+ {
+ return docStoreOffset;
+ }
+
+ internal bool GetDocStoreIsCompoundFile()
+ {
+ return docStoreIsCompoundFile;
+ }
+
+ internal void SetDocStoreIsCompoundFile(bool v)
+ {
+ docStoreIsCompoundFile = v;
+ ClearFiles();
+ }
+
+ internal System.String GetDocStoreSegment()
+ {
+ return docStoreSegment;
+ }
+
+ internal void SetDocStoreOffset(int offset)
+ {
+ docStoreOffset = offset;
+ ClearFiles();
+ }
+
/// <summary> Save this segment's info.</summary>
internal void Write(IndexOutput output)
{
output.WriteString(name);
output.WriteInt(docCount);
output.WriteLong(delGen);
+ output.WriteInt(docStoreOffset);
+ if (docStoreOffset != - 1)
+ {
+ output.WriteString(docStoreSegment);
+ output.WriteByte((byte) (docStoreIsCompoundFile ? 1 : 0));
+ }
+
output.WriteByte((byte) (hasSingleNormFile ? 1 : 0));
if (normGen == null)
{
- output.WriteInt(- 1);
+ output.WriteInt(NO);
}
else
{
@@ -436,5 +568,198 @@
}
output.WriteByte((byte) isCompoundFile);
}
+
+ private void AddIfExists(System.Collections.IList files, System.String fileName)
+ {
+ if (dir.FileExists(fileName))
+ files.Add(fileName);
+ }
+
+ /*
+ * Return all files referenced by this SegmentInfo. The
+ * returns List is a locally cached List so you should not
+ * modify it.
+ */
+
+ public System.Collections.IList Files()
+ {
+
+ if (files != null)
+ {
+ // Already cached:
+ return files;
+ }
+
+ files = new System.Collections.ArrayList();
+
+ bool useCompoundFile = GetUseCompoundFile();
+
+ if (useCompoundFile)
+ {
+ files.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ }
+ else
+ {
+ System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
+ for (int i = 0; i < exts.Length; i++)
+ AddIfExists(files, name + "." + exts[i]);
+ }
+
+ if (docStoreOffset != - 1)
+ {
+ // We are sharing doc stores (stored fields, term
+ // vectors) with other segments
+ System.Diagnostics.Debug.Assert(docStoreSegment != null);
+ if (docStoreIsCompoundFile)
+ {
+ files.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+ }
+ else
+ {
+ System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+ for (int i = 0; i < exts.Length; i++)
+ AddIfExists(files, docStoreSegment + "." + exts[i]);
+ }
+ }
+ else if (!useCompoundFile)
+ {
+ // We are not sharing, and, these files were not
+ // included in the compound file
+ System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+ for (int i = 0; i < exts.Length; i++)
+ AddIfExists(files, name + "." + exts[i]);
+ }
+
+ System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
+ if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName)))
+ {
+ files.Add(delFileName);
+ }
+
+ // Careful logic for norms files
+ if (normGen != null)
+ {
+ for (int i = 0; i < normGen.Length; i++)
+ {
+ long gen = normGen[i];
+ if (gen >= YES)
+ {
+ // Definitely a separate norm file, with generation:
+ files.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
+ }
+ else if (NO == gen)
+ {
+ // No separate norms but maybe plain norms
+ // in the non compound file case:
+ if (!hasSingleNormFile && !useCompoundFile)
+ {
+ System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
+ if (dir.FileExists(fileName))
+ {
+ files.Add(fileName);
+ }
+ }
+ }
+ else if (CHECK_DIR == gen)
+ {
+ // Pre-2.1: we have to check file existence
+ System.String fileName = null;
+ if (useCompoundFile)
+ {
+ fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
+ }
+ else if (!hasSingleNormFile)
+ {
+ fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
+ }
+ if (fileName != null && dir.FileExists(fileName))
+ {
+ files.Add(fileName);
+ }
+ }
+ }
+ }
+ else if (preLockless || (!hasSingleNormFile && !useCompoundFile))
+ {
+ // Pre-2.1: we have to scan the dir to find all
+ // matching _X.sN/_X.fN files for our segment:
+ System.String prefix;
+ if (useCompoundFile)
+ prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
+ else
+ prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
+ int prefixLength = prefix.Length;
+ System.String[] allFiles = dir.List();
+ if (allFiles == null)
+ {
+ throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
+ }
+ for (int i = 0; i < allFiles.Length; i++)
+ {
+ System.String fileName = allFiles[i];
+ if (fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix))
+ {
+ files.Add(fileName);
+ }
+ }
+ }
+ return files;
+ }
+
+ /* Called whenever any change is made that affects which
+ * files this segment has. */
+ private void ClearFiles()
+ {
+ files = null;
+ sizeInBytes = - 1;
+ }
+
+ /// <summary>Used for debugging </summary>
+ public System.String SegString(Directory dir)
+ {
+ System.String cfs;
+ try
+ {
+ if (GetUseCompoundFile())
+ cfs = "c";
+ else
+ cfs = "C";
+ }
+ catch (System.IO.IOException ioe)
+ {
+ cfs = "?";
+ }
+
+ System.String docStore;
+
+ if (docStoreOffset != - 1)
+ docStore = "->" + docStoreSegment;
+ else
+ docStore = "";
+
+ return name + ":" + cfs + (this.dir == dir ? "" : "x") + docCount + docStore;
+ }
+
+ /// <summary>We consider another SegmentInfo instance equal if it
+ /// has the same dir and same name.
+ /// </summary>
+ public override bool Equals(System.Object obj)
+ {
+ SegmentInfo other;
+ try
+ {
+ other = (SegmentInfo) obj;
+ }
+ catch (System.InvalidCastException cce)
+ {
+ return false;
+ }
+ return other.dir == dir && other.name.Equals(name);
+ }
+
+ public override int GetHashCode()
+ {
+ return dir.GetHashCode() + name.GetHashCode();
+ }
}
}
\ No newline at end of file