You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2006/06/04 04:41:25 UTC
svn commit: r411501 [11/30] - in /incubator/lucene.net/trunk/C#/src: ./
Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/
Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/
Lucene.Net/Analysis/Standard/ Lucene.Net/Docu...
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs Sat Jun 3 19:41:13 2006
@@ -13,21 +13,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
using BitVector = Lucene.Net.Util.BitVector;
+
namespace Lucene.Net.Index
{
- /// <summary> FIXME: Describe class <code>SegmentReader</code> here.
- ///
- /// </summary>
- /// <version> $Id: SegmentReader.java,v 1.23 2004/07/10 06:19:01 otis Exp $
+ /// <version> $Id: SegmentReader.java 329523 2005-10-30 05:37:11Z yonik $
/// </version>
- sealed public class SegmentReader : IndexReader
+ public class SegmentReader : IndexReader
{
private System.String segment;
@@ -35,19 +36,25 @@
private FieldsReader fieldsReader;
internal TermInfosReader tis;
- internal TermVectorsReader termVectorsReader;
+ internal TermVectorsReader termVectorsReaderOrig = null;
+ internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
internal BitVector deletedDocs = null;
private bool deletedDocsDirty = false;
private bool normsDirty = false;
private bool undeleteAll = false;
- internal InputStream freqStream;
- internal InputStream proxStream;
+ internal IndexInput freqStream;
+ internal IndexInput proxStream;
// Compound File Reader when based on a compound file segment
- internal CompoundFileReader cfsReader;
+ internal CompoundFileReader cfsReader = null;
+ public FieldInfos FieldInfos
+ {
+ get { return fieldInfos; }
+ }
+
private class Norm
{
private void InitBlock(SegmentReader enclosingInstance)
@@ -63,22 +70,22 @@
}
}
- public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number)
+ public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
{
InitBlock(enclosingInstance);
this.in_Renamed = in_Renamed;
this.number = number;
}
- public InputStream in_Renamed; // private -> public
- public byte[] bytes; // private -> public
- public bool dirty; // private -> public
- public int number; // private -> public
+ public IndexInput in_Renamed;
+ public byte[] bytes;
+ public bool dirty;
+ public int number;
- public void ReWrite() // private -> public
+ public void ReWrite()
{
// NOTE: norms are re-written in regular directory, not cfs
- OutputStream out_Renamed = Enclosing_Instance.Directory().CreateFile(Enclosing_Instance.segment + ".tmp");
+ IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
try
{
out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
@@ -87,7 +94,14 @@
{
out_Renamed.Close();
}
- System.String fileName = Enclosing_Instance.segment + ".f" + number;
+ System.String fileName;
+ if (Enclosing_Instance.cfsReader == null)
+ fileName = Enclosing_Instance.segment + ".f" + number;
+ else
+ {
+ // use a different file name if we have compound format
+ fileName = Enclosing_Instance.segment + ".s" + number;
+ }
Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
this.dirty = false;
}
@@ -95,14 +109,37 @@
private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
- public /*internal*/ SegmentReader(SegmentInfos sis, SegmentInfo si, bool closeDir) : base(si.dir, sis, closeDir)
+ /// <summary>The class which implements SegmentReader. </summary>
+ private static System.Type IMPL;
+
+ public SegmentReader() : base(null)
+ {
+ }
+
+ public static SegmentReader Get(SegmentInfo si)
+ {
+ return Get(si.dir, si, null, false, false);
+ }
+
+ public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
{
- Initialize(si);
+ return Get(si.dir, si, sis, closeDir, true);
}
- public /*internal*/ SegmentReader(SegmentInfo si) : base(si.dir)
+ public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
{
- Initialize(si);
+ SegmentReader instance;
+ try
+ {
+ instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("cannot load SegmentReader class: " + e);
+ }
+ instance.Init(dir, sis, closeDir, ownDir);
+ instance.Initialize(si);
+ return instance;
}
private void Initialize(SegmentInfo si)
@@ -129,22 +166,28 @@
// make sure that all index files have been read or are kept open
// so that if an index update removes them we'll still have them
- freqStream = cfsDir.OpenFile(segment + ".frq");
- proxStream = cfsDir.OpenFile(segment + ".prx");
+ freqStream = cfsDir.OpenInput(segment + ".frq");
+ proxStream = cfsDir.OpenInput(segment + ".prx");
OpenNorms(cfsDir);
if (fieldInfos.HasVectors())
{
// open term vector files only as needed
- termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
+ termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
}
}
+ ~SegmentReader()
+ {
+ // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
+ System.Threading.Thread.SetData(termVectorsLocal, null);
+ }
+
protected internal override void DoCommit()
{
if (deletedDocsDirty)
{
- // re-write deleted
+ // re-write deleted
deletedDocs.Write(Directory(), segment + ".tmp");
Directory().RenameFile(segment + ".tmp", segment + ".del");
}
@@ -154,7 +197,7 @@
}
if (normsDirty)
{
- // re-write norms
+ // re-write norms
System.Collections.IEnumerator values = norms.Values.GetEnumerator();
while (values.MoveNext())
{
@@ -181,8 +224,9 @@
proxStream.Close();
CloseNorms();
- if (termVectorsReader != null)
- termVectorsReader.Close();
+
+ if (termVectorsReaderOrig != null)
+ termVectorsReaderOrig.Close();
if (cfsReader != null)
cfsReader.Close();
@@ -207,9 +251,9 @@
internal static bool HasSeparateNorms(SegmentInfo si)
{
System.String[] result = si.dir.List();
- System.String pattern = si.name + ".f";
+ System.String pattern = si.name + ".s";
int patternLength = pattern.Length;
- for (int i = 0; i < 0; i++)
+ for (int i = 0; i < result.Length; i++)
{
if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
return true;
@@ -233,14 +277,13 @@
undeleteAll = true;
}
- internal System.Collections.ArrayList Files()
+ internal virtual System.Collections.ArrayList Files()
{
System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
- System.String[] ext = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"};
- for (int i = 0; i < ext.Length; i++)
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
{
- System.String name = segment + "." + ext[i];
+ System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
if (Directory().FileExists(name))
files.Add(name);
}
@@ -248,8 +291,16 @@
for (int i = 0; i < fieldInfos.Size(); i++)
{
FieldInfo fi = fieldInfos.FieldInfo(i);
- if (fi.isIndexed)
- files.Add(segment + ".f" + i);
+ if (fi.isIndexed && !fi.omitNorms)
+ {
+ System.String name;
+ if (cfsReader == null)
+ name = segment + ".f" + i;
+ else
+ name = segment + ".s" + i;
+ if (Directory().FileExists(name))
+ files.Add(name);
+ }
}
return files;
}
@@ -314,11 +365,13 @@
return fieldsReader.Size();
}
- /// <seealso cref="IndexReader#GetFieldNames()">
+ /// <seealso cref="IndexReader.GetFieldNames()">
/// </seealso>
+ /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
+ /// </deprecated>
public override System.Collections.ICollection GetFieldNames()
{
- // maintain a unique set of Field names
+ // maintain a unique set of field names
System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
for (int i = 0; i < fieldInfos.Size(); i++)
{
@@ -328,11 +381,13 @@
return fieldSet;
}
- /// <seealso cref="IndexReader#GetFieldNames(boolean)">
+ /// <seealso cref="IndexReader.GetFieldNames(boolean)">
/// </seealso>
+ /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
+ /// </deprecated>
public override System.Collections.ICollection GetFieldNames(bool indexed)
{
- // maintain a unique set of Field names
+ // maintain a unique set of field names
System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
for (int i = 0; i < fieldInfos.Size(); i++)
{
@@ -343,20 +398,57 @@
return fieldSet;
}
- /// <summary> </summary>
- /// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info,
- /// else only indexed fields without term vector info
- /// </param>
- /// <returns> Collection of Strings indicating the names of the fields
- /// </returns>
- public override System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector)
+ /// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
+ /// </seealso>
+ /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
+ /// </deprecated>
+ public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
{
- // maintain a unique set of Field names
+ bool storedTermVector;
+ bool storePositionWithTermVector;
+ bool storeOffsetWithTermVector;
+
+ if (tvSpec == Field.TermVector.NO)
+ {
+ storedTermVector = false;
+ storePositionWithTermVector = false;
+ storeOffsetWithTermVector = false;
+ }
+ else if (tvSpec == Field.TermVector.YES)
+ {
+ storedTermVector = true;
+ storePositionWithTermVector = false;
+ storeOffsetWithTermVector = false;
+ }
+ else if (tvSpec == Field.TermVector.WITH_POSITIONS)
+ {
+ storedTermVector = true;
+ storePositionWithTermVector = true;
+ storeOffsetWithTermVector = false;
+ }
+ else if (tvSpec == Field.TermVector.WITH_OFFSETS)
+ {
+ storedTermVector = true;
+ storePositionWithTermVector = false;
+ storeOffsetWithTermVector = true;
+ }
+ else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
+ {
+ storedTermVector = true;
+ storePositionWithTermVector = true;
+ storeOffsetWithTermVector = true;
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
+ }
+
+ // maintain a unique set of field names
System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
- for (int ii = 0; ii < fieldInfos.Size(); ii++)
+ for (int i = 0; i < fieldInfos.Size(); i++)
{
- FieldInfo fi = fieldInfos.FieldInfo(ii);
- if (fi.isIndexed == true && fi.storeTermVector == storedTermVector)
+ FieldInfo fi = fieldInfos.FieldInfo(i);
+ if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
{
fieldSet.Add(fi.name, fi.name);
}
@@ -364,14 +456,93 @@
return fieldSet;
}
- public override byte[] Norms(System.String field)
+ /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
+ /// </seealso>
+ public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
+ {
+ System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
+ for (int i = 0; i < fieldInfos.Size(); i++)
+ {
+ FieldInfo fi = fieldInfos.FieldInfo(i);
+ if (fieldOption == IndexReader.FieldOption.ALL)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
+ {
+ fieldSet.Add(fi.name, fi.name);
+ }
+ }
+ return fieldSet;
+ }
+
+
+ public override bool HasNorms(System.String field)
+ {
+ lock (this)
+ {
+ return norms.ContainsKey(field);
+ }
+ }
+
+ internal static byte[] CreateFakeNorms(int size)
+ {
+ byte[] ones = new byte[size];
+ byte[] byteArray = new byte[ones.Length];
+ for (int index = 0; index < ones.Length; index++)
+ byteArray[index] = (byte) ones[index];
+ byte val = DefaultSimilarity.EncodeNorm(1.0f);
+ for (int index = 0; index < byteArray.Length; index++)
+ byteArray.SetValue(val, index);
+
+ return ones;
+ }
+
+ private byte[] ones;
+ private byte[] FakeNorms()
+ {
+ if (ones == null)
+ ones = CreateFakeNorms(MaxDoc());
+ return ones;
+ }
+
+ // can return null if norms aren't stored
+ protected internal virtual byte[] GetNorms(System.String field)
{
lock (this)
{
Norm norm = (Norm) norms[field];
if (norm == null)
- // not an indexed Field
- return null;
+ return null; // not indexed, or norms not stored
+
if (norm.bytes == null)
{
// value not yet read
@@ -383,11 +554,23 @@
}
}
+ // returns fake norms if norms aren't available
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ byte[] bytes = GetNorms(field);
+ if (bytes == null)
+ bytes = FakeNorms();
+ return bytes;
+ }
+ }
+
protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed)
{
Norm norm = (Norm) norms[field];
if (norm == null)
- // not an indexed Field
+ // not an indexed field
return ;
norm.dirty = true; // mark it dirty
normsDirty = true;
@@ -403,7 +586,10 @@
Norm norm = (Norm) norms[field];
if (norm == null)
- return ; // use zeros in array
+ {
+ Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
+ return ;
+ }
if (norm.bytes != null)
{
@@ -412,7 +598,7 @@
return ;
}
- InputStream normStream = (InputStream) norm.in_Renamed.Clone();
+ IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
try
{
// read from disk
@@ -426,17 +612,23 @@
}
}
+
private void OpenNorms(Directory cfsDir)
{
for (int i = 0; i < fieldInfos.Size(); i++)
{
FieldInfo fi = fieldInfos.FieldInfo(i);
- if (fi.isIndexed)
+ if (fi.isIndexed && !fi.omitNorms)
{
- System.String fileName = segment + ".f" + fi.number;
- // look first for re-written file, then in compound format
- Directory d = Directory().FileExists(fileName)?Directory():cfsDir;
- norms[fi.name] = new Norm(this, d.OpenFile(fileName), fi.number);
+ // look first if there are separate norms in compound format
+ System.String fileName = segment + ".s" + fi.number;
+ Directory d = Directory();
+ if (!d.FileExists(fileName))
+ {
+ fileName = segment + ".f" + fi.number;
+ d = cfsDir;
+ }
+ norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
}
}
}
@@ -454,16 +646,35 @@
}
}
- /// <summary>Return a term frequency vector for the specified document and Field. The
+ /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
+ /// <returns> TermVectorsReader
+ /// </returns>
+ private TermVectorsReader GetTermVectorsReader()
+ {
+ TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
+ if (tvReader == null)
+ {
+ tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
+ System.Threading.Thread.SetData(termVectorsLocal, tvReader);
+ }
+ return tvReader;
+ }
+
+ /// <summary>Return a term frequency vector for the specified document and field. The
/// vector returned contains term numbers and frequencies for all terms in
- /// the specified Field of this document, if the Field had storeTermVector
+ /// the specified field of this document, if the field had storeTermVector
/// flag set. If the flag was not set, the method returns null.
/// </summary>
+ /// <throws> IOException </throws>
public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
{
- // Check if this Field is invalid or has no stored term vector
+ // Check if this field is invalid or has no stored term vector
FieldInfo fi = fieldInfos.FieldInfo(field);
- if (fi == null || !fi.storeTermVector)
+ if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
+ return null;
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
+ if (termVectorsReader == null)
return null;
return termVectorsReader.Get(docNumber, field);
@@ -471,17 +682,48 @@
/// <summary>Return an array of term frequency vectors for the specified document.
- /// The array contains a vector for each vectorized Field in the document.
+ /// The array contains a vector for each vectorized field in the document.
/// Each vector vector contains term numbers and frequencies for all terms
- /// in a given vectorized Field.
+ /// in a given vectorized field.
/// If no such fields existed, the method returns null.
/// </summary>
+ /// <throws> IOException </throws>
public override TermFreqVector[] GetTermFreqVectors(int docNumber)
{
+ if (termVectorsReaderOrig == null)
+ return null;
+
+ TermVectorsReader termVectorsReader = GetTermVectorsReader();
if (termVectorsReader == null)
return null;
return termVectorsReader.Get(docNumber);
+ }
+
+ static SegmentReader()
+ {
+ {
+ try
+ {
+ System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
+ IMPL = System.Type.GetType(name);
+ }
+ catch (System.Security.SecurityException)
+ {
+ try
+ {
+ IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("cannot load default SegmentReader class: " + e);
+ }
+ }
+ catch (System.Exception e)
+ {
+ throw new System.SystemException("cannot load SegmentReader class: " + e);
+ }
+ }
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermDocs.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs Sat Jun 3 19:41:13 2006
@@ -13,36 +13,38 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
using BitVector = Lucene.Net.Util.BitVector;
+
namespace Lucene.Net.Index
{
public class SegmentTermDocs : TermDocs
{
protected internal SegmentReader parent;
- private InputStream freqStream;
- private int count;
- private int df;
- private BitVector deletedDocs;
+ protected internal IndexInput freqStream;
+ protected internal int count;
+ protected internal int df;
+ protected internal BitVector deletedDocs;
internal int doc = 0;
internal int freq;
private int skipInterval;
private int numSkips;
private int skipCount;
- private InputStream skipStream;
+ private IndexInput skipStream;
private int skipDoc;
private long freqPointer;
private long proxPointer;
private long skipPointer;
private bool haveSkipped;
- public /*internal*/ SegmentTermDocs(SegmentReader parent)
+ public SegmentTermDocs(SegmentReader parent)
{
this.parent = parent;
- this.freqStream = (InputStream) parent.freqStream.Clone();
+ this.freqStream = (IndexInput) parent.freqStream.Clone();
this.deletedDocs = parent.deletedDocs;
this.skipInterval = parent.tis.GetSkipInterval();
}
@@ -177,7 +179,7 @@
// optimized case
if (skipStream == null)
- skipStream = (InputStream) freqStream.Clone(); // lazily clone
+ skipStream = (IndexInput) freqStream.Clone(); // lazily clone
if (!haveSkipped)
{
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermEnum.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs Sat Jun 3 19:41:13 2006
@@ -13,19 +13,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
namespace Lucene.Net.Index
{
- sealed public class SegmentTermEnum:TermEnum, System.ICloneable
+ public sealed class SegmentTermEnum : TermEnum, System.ICloneable
{
- private InputStream input;
+ private IndexInput input;
internal FieldInfos fieldInfos;
internal long size;
internal long position = - 1;
- private Term term = new Term("", "");
+ private TermBuffer termBuffer = new TermBuffer();
+ private TermBuffer prevBuffer = new TermBuffer();
+ private TermBuffer scratch; // used for scanning
+
private TermInfo termInfo = new TermInfo();
private int format;
@@ -34,11 +39,8 @@
internal int indexInterval;
internal int skipInterval;
private int formatM1SkipInterval;
- internal Term prev;
-
- private char[] buffer = new char[]{};
- internal SegmentTermEnum(InputStream i, FieldInfos fis, bool isi)
+ internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
{
input = i;
fieldInfos = fis;
@@ -96,10 +98,12 @@
{
}
- clone.input = (InputStream) input.Clone();
+ clone.input = (IndexInput) input.Clone();
clone.termInfo = new TermInfo(termInfo);
- if (term != null)
- clone.GrowBuffer(term.text.Length);
+
+ clone.termBuffer = (TermBuffer) termBuffer.Clone();
+ clone.prevBuffer = (TermBuffer) prevBuffer.Clone();
+ clone.scratch = null;
return clone;
}
@@ -108,10 +112,9 @@
{
input.Seek(pointer);
position = p;
- term = t;
- prev = null;
+ termBuffer.Set(t);
+ prevBuffer.Reset();
termInfo.Set(ti);
- GrowBuffer(term.text.Length); // copy term text into buffer
}
/// <summary>Increments the enumeration to the next element. True if one exists.</summary>
@@ -119,12 +122,12 @@
{
if (position++ >= size - 1)
{
- term = null;
+ termBuffer.Reset();
return false;
}
- prev = term;
- term = ReadTerm();
+ prevBuffer.Set(termBuffer);
+ termBuffer.Read(input, fieldInfos);
termInfo.docFreq = input.ReadVInt(); // read doc freq
termInfo.freqPointer += input.ReadVLong(); // read freq pointer
@@ -154,24 +157,15 @@
return true;
}
- private Term ReadTerm()
- {
- int start = input.ReadVInt();
- int length = input.ReadVInt();
- int totalLength = start + length;
- if (buffer.Length < totalLength)
- GrowBuffer(totalLength);
-
- input.ReadChars(buffer, start, length);
- return new Term(fieldInfos.FieldName(input.ReadVInt()), new System.String(buffer, 0, totalLength), false);
- }
-
- private void GrowBuffer(int length)
+ /// <summary>Optimized scan, without allocating new terms. </summary>
+ internal void ScanTo(Term term)
{
- buffer = new char[length];
- for (int i = 0; i < term.text.Length; i++)
- // copy contents
- buffer[i] = term.text[i];
+ if (scratch == null)
+ scratch = new TermBuffer();
+ scratch.Set(term);
+ while (scratch.CompareTo(termBuffer) > 0 && Next())
+ {
+ }
}
/// <summary>Returns the current Term in the enumeration.
@@ -179,13 +173,19 @@
/// </summary>
public override Term Term()
{
- return term;
+ return termBuffer.ToTerm();
+ }
+
+ /// <summary>Returns the previous Term enumerated. Initially null.</summary>
+ internal Term Prev()
+ {
+ return prevBuffer.ToTerm();
}
/// <summary>Returns the current TermInfo in the enumeration.
/// Initially invalid, valid after next() called for the first time.
/// </summary>
- public /*internal*/ TermInfo TermInfo()
+ internal TermInfo TermInfo()
{
return new TermInfo(termInfo);
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermPositionVector.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ public class SegmentTermPositionVector : SegmentTermVector, TermPositionVector
+ {
+ protected internal int[][] positions;
+ protected internal TermVectorOffsetInfo[][] offsets;
+ public static readonly int[] EMPTY_TERM_POS = new int[0];
+
+ public SegmentTermPositionVector(System.String field, System.String[] terms, int[] termFreqs, int[][] positions, TermVectorOffsetInfo[][] offsets):base(field, terms, termFreqs)
+ {
+ this.offsets = offsets;
+ this.positions = positions;
+ }
+
+ /// <summary> Returns an array of TermVectorOffsetInfo in which the term is found.
+ ///
+ /// </summary>
+ /// <param name="index">The position in the array to get the offsets from
+ /// </param>
+ /// <returns> An array of TermVectorOffsetInfo objects or the empty list
+ /// </returns>
+ /// <seealso cref="Lucene.Net.analysis.Token">
+ /// </seealso>
+ public virtual TermVectorOffsetInfo[] GetOffsets(int index)
+ {
+ TermVectorOffsetInfo[] result = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
+ if (offsets == null)
+ return null;
+ if (index >= 0 && index < offsets.Length)
+ {
+ result = offsets[index];
+ }
+ return result;
+ }
+
+ /// <summary> Returns an array of positions in which the term is found.
+ /// Terms are identified by the index at which its number appears in the
+ /// term String array obtained from the <code>indexOf</code> method.
+ /// </summary>
+ public virtual int[] GetTermPositions(int index)
+ {
+ int[] result = EMPTY_TERM_POS;
+ if (positions == null)
+ return null;
+ if (index >= 0 && index < positions.Length)
+ {
+ result = positions[index];
+ }
+
+ return result;
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermPositions.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs Sat Jun 3 19:41:13 2006
@@ -13,20 +13,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
namespace Lucene.Net.Index
{
sealed class SegmentTermPositions : SegmentTermDocs, TermPositions
{
- private InputStream proxStream;
+ private IndexInput proxStream;
private int proxCount;
private int position;
- internal SegmentTermPositions(SegmentReader p):base(p)
+ internal SegmentTermPositions(SegmentReader p) : base(p)
{
- this.proxStream = (InputStream) parent.proxStream.Clone();
+ this.proxStream = (IndexInput) parent.proxStream.Clone();
}
internal override void Seek(TermInfo ti)
@@ -78,7 +80,7 @@
}
- /// <summary>Called by base.SkipTo(). </summary>
+ /// <summary>Called by super.skipTo(). </summary>
protected internal override void SkipProx(long proxPointer)
{
proxStream.Seek(proxPointer);
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermVector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs Sat Jun 3 19:41:13 2006
@@ -13,12 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
- class SegmentTermVector : TermFreqVector
+ public class SegmentTermVector : TermFreqVector
{
private System.String field;
private System.String[] terms;
@@ -32,7 +34,7 @@
}
/// <summary> </summary>
- /// <returns> The number of the Field this vector is associated with
+ /// <returns> The number of the field this vector is associated with
/// </returns>
public virtual System.String GetField()
{
@@ -44,13 +46,17 @@
System.Text.StringBuilder sb = new System.Text.StringBuilder();
sb.Append('{');
sb.Append(field).Append(": ");
- for (int i = 0; i < terms.Length; i++)
+ if (terms != null)
{
- if (i > 0)
- sb.Append(", ");
- sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
+ for (int i = 0; i < terms.Length; i++)
+ {
+ if (i > 0)
+ sb.Append(", ");
+ sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
+ }
}
sb.Append('}');
+
return sb.ToString();
}
@@ -71,6 +77,8 @@
public virtual int IndexOf(System.String termText)
{
+ if (terms == null)
+ return - 1;
int res = System.Array.BinarySearch(terms, termText);
return res >= 0?res:- 1;
}
@@ -86,7 +94,7 @@
for (int i = 0; i < len; i++)
{
- res[i] = IndexOf(termNumbers[i]);
+ res[i] = IndexOf(termNumbers[start + i]);
}
return res;
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Term.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs Sat Jun 3 19:41:13 2006
@@ -13,32 +13,36 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
- /// <summary>A Term represents a word from text. This is the unit of search. It is
- /// composed of two elements, the text of the word, as a string, and the name of
- /// the Field that the text occured in, an interned string.
- /// Note that terms may represent more than words from text fields, but also
- /// things like dates, email addresses, urls, etc.
- /// </summary>
- [Serializable]
+
+ /// <summary>A Term represents a word from text. This is the unit of search. It is
+ /// composed of two elements, the text of the word, as a string, and the name of
+ /// the field that the text occured in, an interned string.
+ /// Note that terms may represent more than words from text fields, but also
+ /// things like dates, email addresses, urls, etc.
+ /// </summary>
+
+ [Serializable]
public sealed class Term : System.IComparable
{
internal System.String field;
public /*internal*/ System.String text;
- /// <summary>Constructs a Term with the given Field and text. </summary>
+ /// <summary>Constructs a Term with the given field and text. </summary>
public Term(System.String fld, System.String txt) : this(fld, txt, true)
{
}
internal Term(System.String fld, System.String txt, bool intern)
{
- field = intern ? String.Intern(fld) : fld; // Field names are interned
+ field = intern ? String.Intern(fld) : fld; // field names are interned
text = txt; // unless already known to be
}
- /// <summary>Returns the Field of this term, an interned string. The Field indicates
+ /// <summary>Returns the field of this term, an interned string. The field indicates
/// the part of a document which this term came from.
/// </summary>
public System.String Field()
@@ -55,10 +59,22 @@
return text;
}
+ /// <summary> Optimized construction of new Terms by reusing same field as this Term
+ /// - avoids field.intern() overhead
+ /// </summary>
+ /// <param name="text">The text of the new term (field is implicitly same as this Term instance)
+ /// </param>
+ /// <returns> A new Term
+ /// </returns>
+ public Term CreateTerm(System.String text)
+ {
+ return new Term(field, text, false);
+ }
+
/// <summary>Compares two terms, returning true iff they have the same
- /// Field and text.
+ /// field and text.
/// </summary>
- public override bool Equals(System.Object o)
+ public override bool Equals(System.Object o)
{
if (o == null)
return false;
@@ -66,7 +82,7 @@
return field == other.field && text.Equals(other.text);
}
- /// <summary>Combines the hashCode() of the Field and the text. </summary>
+ /// <summary>Combines the hashCode() of the field and the text. </summary>
public override int GetHashCode()
{
return field.GetHashCode() + text.GetHashCode();
@@ -77,10 +93,10 @@
return CompareTo((Term) other);
}
- /// <summary>Compares two terms, returning an integer which is less than zero iff this
- /// term belongs after the argument, equal zero iff this term is equal to the
- /// argument, and greater than zero iff this term belongs after the argument.
- /// The ordering of terms is first by Field, then by text.
+ /// <summary>Compares two terms, returning a negative integer if this
+ /// term belongs before the argument, zero if this term is equal to the
+ /// argument, and a positive integer if this term belongs after the argument.
+ /// The ordering of terms is first by field, then by text.
/// </summary>
public int CompareTo(Term other)
{
@@ -91,7 +107,7 @@
return String.CompareOrdinal(field, other.field);
}
- /// <summary>Resets the Field and text of a Term. </summary>
+ /// <summary>Resets the field and text of a Term. </summary>
internal void Set(System.String fld, System.String txt)
{
field = fld;
@@ -105,12 +121,13 @@
private void ReadObject(System.IO.BinaryReader in_Renamed)
{
- // This function is private and is never been called, so this may not be a port issue.
- // in_Renamed.defaultReadObject(); >> 'java.io.ObjectInputStream.defaultReadObject()' // {{Aroush-1.4.3}}
+ // This function is private and is never been called, so this may not be a port issue. // {{Aroush-1.4.3}}
+ // 'java.io.ObjectInputStream.defaultReadObject' was not converted // {{Aroush-1.4.3}}
+ // in_Renamed.defaultReadObject(); // {{Aroush-1.4.3}}
field = String.Intern(field);
}
- // {{Aroush-1.4.3: Or is this what we want (vs. the above)?!!
+ // {{Aroush-1.4.3: or is this method is what we want (vs. the above)?!!
private void GetObjectData(System.Runtime.Serialization.SerializationInfo info, System.Runtime.Serialization.StreamingContext context)
{
info.AddValue("field", field);
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermBuffer.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class TermBuffer : System.ICloneable
+ {
+ private static readonly char[] NO_CHARS = new char[0];
+
+ private System.String field;
+ private char[] text = NO_CHARS;
+ private int textLength;
+ private Term term; // cached
+
+ public int CompareTo(TermBuffer other)
+ {
+ if ((System.Object) field == (System.Object) other.field)
+ // fields are interned
+ return CompareChars(text, textLength, other.text, other.textLength);
+ else
+ return String.CompareOrdinal(field, other.field);
+ }
+
+ private static int CompareChars(char[] v1, int len1, char[] v2, int len2)
+ {
+ int end = System.Math.Min(len1, len2);
+ for (int k = 0; k < end; k++)
+ {
+ char c1 = v1[k];
+ char c2 = v2[k];
+ if (c1 != c2)
+ {
+ return c1 - c2;
+ }
+ }
+ return len1 - len2;
+ }
+
+ private void SetTextLength(int newLength)
+ {
+ if (text.Length < newLength)
+ {
+ char[] newText = new char[newLength];
+ Array.Copy(text, 0, newText, 0, textLength);
+ text = newText;
+ }
+ textLength = newLength;
+ }
+
+ public void Read(IndexInput input, FieldInfos fieldInfos)
+ {
+ this.term = null; // invalidate cache
+ int start = input.ReadVInt();
+ int length = input.ReadVInt();
+ int totalLength = start + length;
+ SetTextLength(totalLength);
+ input.ReadChars(this.text, start, length);
+ this.field = fieldInfos.FieldName(input.ReadVInt());
+ }
+
+ public void Set(Term term)
+ {
+ if (term == null)
+ {
+ Reset();
+ return ;
+ }
+
+ // copy text into the buffer
+ SetTextLength(term.Text().Length);
+
+ System.String sourceString = term.Text();
+ int sourceEnd = term.Text().Length;
+ for (int i = 0; i < sourceEnd; i++)
+ {
+ text[i] = (char) sourceString[i];
+ }
+
+ this.field = term.Field();
+ this.term = term;
+ }
+
+ public void Set(TermBuffer other)
+ {
+ SetTextLength(other.textLength);
+ Array.Copy(other.text, 0, text, 0, textLength);
+
+ this.field = other.field;
+ this.term = other.term;
+ }
+
+ public void Reset()
+ {
+ this.field = null;
+ this.textLength = 0;
+ this.term = null;
+ }
+
+ public Term ToTerm()
+ {
+ if (field == null)
+ // unset
+ return null;
+
+ if (term == null)
+ term = new Term(field, new System.String(text, 0, textLength), false);
+
+ return term;
+ }
+
+ public System.Object Clone()
+ {
+ TermBuffer clone = null;
+ try
+ {
+ clone = (TermBuffer) base.MemberwiseClone();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ clone.text = new char[text.Length];
+ Array.Copy(text, 0, clone.text, 0, textLength);
+
+ return clone;
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermDocs.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermDocs.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermDocs.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermDocs.cs Sat Jun 3 19:41:13 2006
@@ -13,7 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
@@ -23,7 +25,7 @@
/// the number of times the term occurred in each document. <p> The pairs are
/// ordered by document number.
/// </summary>
- /// <seealso cref="IndexReader#termDocs">
+ /// <seealso cref="IndexReader.TermDocs()">
/// </seealso>
public interface TermDocs
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermEnum.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermEnum.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermEnum.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermEnum.cs Sat Jun 3 19:41:13 2006
@@ -13,7 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermFreqVector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs Sat Jun 3 19:41:13 2006
@@ -13,17 +13,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
/// <summary>Provides access to stored term vector of
- /// a document Field.
+ /// a document field.
/// </summary>
public interface TermFreqVector
{
/// <summary> </summary>
- /// <returns> The Field this vector is associated with.
+ /// <returns> The field this vector is associated with.
///
/// </returns>
System.String GetField();
@@ -40,7 +42,7 @@
/// <summary>Array of term frequencies. Locations of the array correspond one to one
/// to the terms in the array obtained from <code>getTerms</code>
/// method. Each location in the array contains the number of times this
- /// term occurs in the document or the document Field.
+ /// term occurs in the document or the document field.
/// </summary>
int[] GetTermFrequencies();
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfo.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfo.cs Sat Jun 3 19:41:13 2006
@@ -13,11 +13,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
- /// <summary>A TermInfo is the record of information stored for a term.</summary>
- sealed public class TermInfo
+
+ /// <summary>A TermInfo is the record of information stored for a term.</summary>
+
+ public sealed class TermInfo
{
/// <summary>The number of documents which contain the term. </summary>
public /*internal*/ int docFreq = 0;
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfosReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs Sat Jun 3 19:41:13 2006
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Directory = Lucene.Net.Store.Directory;
+
namespace Lucene.Net.Index
{
@@ -23,7 +25,7 @@
/// set.
/// </summary>
- sealed public class TermInfosReader
+ public sealed class TermInfosReader
{
private Directory directory;
private System.String segment;
@@ -33,15 +35,34 @@
private SegmentTermEnum origEnum;
private long size;
+ private Term[] indexTerms = null;
+ private TermInfo[] indexInfos;
+ private long[] indexPointers;
+
+ private SegmentTermEnum indexEnum;
+
public /*internal*/ TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
{
directory = dir;
segment = seg;
fieldInfos = fis;
- origEnum = new SegmentTermEnum(directory.OpenFile(segment + ".tis"), fieldInfos, false);
+ origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
size = origEnum.size;
- ReadIndex();
+
+ indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
+ }
+
+ ~TermInfosReader()
+ {
+ try
+ {
+ // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
+ System.Threading.Thread.SetData(enumerators, null); // {{Aroush-1.9}} is this required for .NET ?!
+ }
+ catch (Exception)
+ {
+ }
}
public int GetSkipInterval()
@@ -53,6 +74,8 @@
{
if (origEnum != null)
origEnum.Close();
+ if (indexEnum != null)
+ indexEnum.Close();
}
/// <summary>Returns the number of term/value pairs in the set. </summary>
@@ -72,31 +95,33 @@
return termEnum;
}
- internal Term[] indexTerms = null;
- internal TermInfo[] indexInfos;
- internal long[] indexPointers;
-
- private void ReadIndex()
+ private void EnsureIndexIsRead()
{
- SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenFile(segment + ".tii"), fieldInfos, true);
- try
+ lock (this)
{
- int indexSize = (int) indexEnum.size;
-
- indexTerms = new Term[indexSize];
- indexInfos = new TermInfo[indexSize];
- indexPointers = new long[indexSize];
-
- for (int i = 0; indexEnum.Next(); i++)
+ if (indexTerms != null)
+ // index already read
+ return ; // do nothing
+ try
{
- indexTerms[i] = indexEnum.Term();
- indexInfos[i] = indexEnum.TermInfo();
- indexPointers[i] = indexEnum.indexPointer;
+ int indexSize = (int) indexEnum.size; // otherwise read index
+
+ indexTerms = new Term[indexSize];
+ indexInfos = new TermInfo[indexSize];
+ indexPointers = new long[indexSize];
+
+ for (int i = 0; indexEnum.Next(); i++)
+ {
+ indexTerms[i] = indexEnum.Term();
+ indexInfos[i] = indexEnum.TermInfo();
+ indexPointers[i] = indexEnum.indexPointer;
+ }
+ }
+ finally
+ {
+ indexEnum.Close();
+ indexEnum = null;
}
- }
- finally
- {
- indexEnum.Close();
}
}
@@ -131,9 +156,11 @@
if (size == 0)
return null;
+ EnsureIndexIsRead();
+
// optimize sequential access: first try scanning cached enum w/o seeking
SegmentTermEnum enumerator = GetEnum();
- if (enumerator.Term() != null && ((enumerator.prev != null && term.CompareTo(enumerator.prev) > 0) || term.CompareTo(enumerator.Term()) >= 0))
+ if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
{
int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
@@ -149,9 +176,7 @@
private TermInfo ScanEnum(Term term)
{
SegmentTermEnum enumerator = GetEnum();
- while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
- {
- }
+ enumerator.ScanTo(term);
if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
return enumerator.TermInfo();
else
@@ -188,6 +213,7 @@
if (size == 0)
return - 1;
+ EnsureIndexIsRead();
int indexOffset = GetIndexOffset(term);
SeekEnum(indexOffset);
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfosWriter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs Sat Jun 3 19:41:13 2006
@@ -13,10 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Directory = Lucene.Net.Store.Directory;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
using StringHelper = Lucene.Net.Util.StringHelper;
+
namespace Lucene.Net.Index
{
@@ -24,13 +26,13 @@
/// Directory. A TermInfos can be written once, in order.
/// </summary>
- sealed public class TermInfosWriter
+ public sealed class TermInfosWriter
{
/// <summary>The file format version, a negative number. </summary>
public const int FORMAT = - 2;
private FieldInfos fieldInfos;
- private OutputStream output;
+ private IndexOutput output;
private Term lastTerm = new Term("", "");
private TermInfo lastTi = new TermInfo();
private long size = 0;
@@ -63,23 +65,24 @@
private TermInfosWriter other = null;
- public /*internal*/ TermInfosWriter(Directory directory, System.String segment, FieldInfos fis)
+ public /*internal*/ TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval)
{
- Initialize(directory, segment, fis, false);
- other = new TermInfosWriter(directory, segment, fis, true);
+ Initialize(directory, segment, fis, interval, false);
+ other = new TermInfosWriter(directory, segment, fis, interval, true);
other.other = this;
}
- private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, bool isIndex)
+ private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex)
{
- Initialize(directory, segment, fis, isIndex);
+ Initialize(directory, segment, fis, interval, isIndex);
}
- private void Initialize(Directory directory, System.String segment, FieldInfos fis, bool isi)
+ private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
{
+ indexInterval = interval;
fieldInfos = fis;
isIndex = isi;
- output = directory.CreateFile(segment + (isIndex?".tii":".tis"));
+ output = directory.CreateOutput(segment + (isIndex ? ".tii" : ".tis"));
output.WriteInt(FORMAT); // write format
output.WriteLong(0); // leave space for size
output.WriteInt(indexInterval); // write indexInterval
@@ -131,7 +134,7 @@
output.WriteVInt(length); // write delta length
output.WriteChars(term.text, start, length); // write delta chars
- output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write Field num
+ output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write field num
lastTerm = term;
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositionVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermPositionVector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositionVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositionVector.cs Sat Jun 3 19:41:13 2006
@@ -13,20 +13,37 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
/// <summary>Extends <code>TermFreqVector</code> to provide additional information about
- /// positions in which each of the terms is found.
+ /// positions in which each of the terms is found. A TermPositionVector not necessarily
+ /// contains both positions and offsets, but at least one of these arrays exists.
/// </summary>
- public interface TermPositionVector:TermFreqVector
+ public interface TermPositionVector : TermFreqVector
{
/// <summary>Returns an array of positions in which the term is found.
/// Terms are identified by the index at which its number appears in the
- /// term number array obtained from <code>getTermNumbers</code> method.
+ /// term String array obtained from the <code>indexOf</code> method.
+ /// May return null if positions have not been stored.
/// </summary>
int[] GetTermPositions(int index);
+
+ /// <summary> Returns an array of TermVectorOffsetInfo in which the term is found.
+ /// May return null if offsets have not been stored.
+ ///
+ /// </summary>
+ /// <seealso cref="Lucene.Net.analysis.Token">
+ ///
+ /// </seealso>
+ /// <param name="index">The position in the array to get the offsets from
+ /// </param>
+ /// <returns> An array of TermVectorOffsetInfo objects or the empty list
+ /// </returns>
+ TermVectorOffsetInfo[] GetOffsets(int index);
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermPositions.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs Sat Jun 3 19:41:13 2006
@@ -13,7 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
@@ -23,10 +25,10 @@
/// positions of each occurrence of a term in a document.
///
/// </summary>
- /// <seealso cref="IndexReader#termPositions">
+ /// <seealso cref="IndexReader.TermPositions()">
/// </seealso>
- public interface TermPositions:TermDocs
+ public interface TermPositions : TermDocs
{
/// <summary>Returns next position in the current document. It is an error to call
/// this more than {@link #Freq()} times
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorOffsetInfo.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ public class TermVectorOffsetInfo
+ {
+ public static readonly TermVectorOffsetInfo[] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0];
+ private int startOffset;
+ private int endOffset;
+
+ public TermVectorOffsetInfo()
+ {
+ }
+
+ public TermVectorOffsetInfo(int startOffset, int endOffset)
+ {
+ this.endOffset = endOffset;
+ this.startOffset = startOffset;
+ }
+
+ public virtual int GetEndOffset()
+ {
+ return endOffset;
+ }
+
+ public virtual void SetEndOffset(int endOffset)
+ {
+ this.endOffset = endOffset;
+ }
+
+ public virtual int GetStartOffset()
+ {
+ return startOffset;
+ }
+
+ public virtual void SetStartOffset(int startOffset)
+ {
+ this.startOffset = startOffset;
+ }
+
+ public override bool Equals(System.Object o)
+ {
+ if (this == o)
+ return true;
+ if (!(o is TermVectorOffsetInfo))
+ return false;
+
+ TermVectorOffsetInfo termVectorOffsetInfo = (TermVectorOffsetInfo) o;
+
+ if (endOffset != termVectorOffsetInfo.endOffset)
+ return false;
+ if (startOffset != termVectorOffsetInfo.startOffset)
+ return false;
+
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ int result;
+ result = startOffset;
+ result = 29 * result + endOffset;
+ return result;
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs Sat Jun 3 19:41:13 2006
@@ -13,59 +13,92 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
namespace Lucene.Net.Index
{
- /// <summary>TODO: relax synchro!</summary>
- public class TermVectorsReader
+ /// <version> $Id: TermVectorsReader.java 170226 2005-05-15 15:04:39Z bmesser $
+ /// </version>
+ public class TermVectorsReader : System.ICloneable
{
private FieldInfos fieldInfos;
- private InputStream tvx;
- private InputStream tvd;
- private InputStream tvf;
+ private IndexInput tvx;
+ private IndexInput tvd;
+ private IndexInput tvf;
private int size;
+ private int tvdFormat;
+ private int tvfFormat;
+
public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
{
if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
{
- tvx = d.OpenFile(segment + TermVectorsWriter.TVX_EXTENSION);
+ tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION);
CheckValidFormat(tvx);
- tvd = d.OpenFile(segment + TermVectorsWriter.TVD_EXTENSION);
- CheckValidFormat(tvd);
- tvf = d.OpenFile(segment + TermVectorsWriter.TVF_EXTENSION);
- CheckValidFormat(tvf);
+ tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION);
+ tvdFormat = CheckValidFormat(tvd);
+ tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION);
+ tvfFormat = CheckValidFormat(tvf);
size = (int) tvx.Length() / 8;
}
this.fieldInfos = fieldInfos;
}
- private void CheckValidFormat(InputStream in_Renamed)
+ private int CheckValidFormat(IndexInput in_Renamed)
{
int format = in_Renamed.ReadInt();
if (format > TermVectorsWriter.FORMAT_VERSION)
{
throw new System.IO.IOException("Incompatible format version: " + format + " expected " + TermVectorsWriter.FORMAT_VERSION + " or less");
}
+ return format;
}
internal virtual void Close()
{
- lock (this)
- {
- // why don't we trap the exception and at least make sure that
- // all streams that we can close are closed?
- if (tvx != null)
+ // make all effort to close up. Keep the first exception
+ // and throw it as a new one.
+ System.IO.IOException keep = null;
+ if (tvx != null)
+ try
+ {
tvx.Close();
- if (tvd != null)
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (tvd != null)
+ try
+ {
tvd.Close();
- if (tvf != null)
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (tvf != null)
+ try
+ {
tvf.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (keep == null)
+ keep = e;
+ }
+ if (keep != null)
+ {
+ throw new System.IO.IOException(keep.StackTrace);
}
}
@@ -77,130 +110,125 @@
return size;
}
- /// <summary> Retrieve the term vector for the given document and Field</summary>
+ /// <summary> Retrieve the term vector for the given document and field</summary>
/// <param name="docNum">The document number to retrieve the vector for
/// </param>
- /// <param name="Field">The Field within the document to retrieve
+ /// <param name="field">The field within the document to retrieve
/// </param>
- /// <returns> The TermFreqVector for the document and Field or null
+ /// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
/// </returns>
+ /// <throws> IOException if there is an error reading the term vector files </throws>
public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
{
- lock (this)
+ // Check if no term vectors are available for this segment at all
+ int fieldNumber = fieldInfos.FieldNumber(field);
+ TermFreqVector result = null;
+ if (tvx != null)
{
- // Check if no term vectors are available for this segment at all
- int fieldNumber = fieldInfos.FieldNumber(field);
- TermFreqVector result = null;
- if (tvx != null)
+ //We need to account for the FORMAT_SIZE at when seeking in the tvx
+ //We don't need to do this in other seeks because we already have the
+ // file pointer
+ //that was written in another file
+ tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+ //System.out.println("TVX Pointer: " + tvx.getFilePointer());
+ long position = tvx.ReadLong();
+
+ tvd.Seek(position);
+ int fieldCount = tvd.ReadVInt();
+ //System.out.println("Num Fields: " + fieldCount);
+ // There are only a few fields per document. We opt for a full scan
+ // rather then requiring that they be ordered. We need to read through
+ // all of the fields anyway to get to the tvf pointers.
+ int number = 0;
+ int found = - 1;
+ for (int i = 0; i < fieldCount; i++)
{
- try
- {
- //We need to account for the FORMAT_SIZE at when seeking in the tvx
- //We don't need to do this in other seeks because we already have the file pointer
- //that was written in another file
- tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
- //System.out.println("TVX Pointer: " + tvx.getFilePointer());
- long position = tvx.ReadLong();
-
- tvd.Seek(position);
- int fieldCount = tvd.ReadVInt();
- //System.out.println("Num Fields: " + fieldCount);
- // There are only a few fields per document. We opt for a full scan
- // rather then requiring that they be ordered. We need to read through
- // all of the fields anyway to get to the tvf pointers.
- int number = 0;
- int found = - 1;
- for (int i = 0; i < fieldCount; i++)
- {
- number += tvd.ReadVInt();
- if (number == fieldNumber)
- found = i;
- }
-
- // This Field, although valid in the segment, was not found in this document
- if (found != - 1)
- {
- // Compute position in the tvf file
- position = 0;
- for (int i = 0; i <= found; i++)
- {
- position += tvd.ReadVLong();
- }
- result = ReadTermVector(field, position);
- }
- else
- {
- //System.out.println("Field not found");
- }
- }
- catch (System.Exception e)
- {
- //System.Console.Out.WriteLine(e.StackTrace);
- }
+ if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+ number = tvd.ReadVInt();
+ else
+ number += tvd.ReadVInt();
+
+ if (number == fieldNumber)
+ found = i;
+ }
+
+ // This field, although valid in the segment, was not found in this
+ // document
+ if (found != - 1)
+ {
+ // Compute position in the tvf file
+ position = 0;
+ for (int i = 0; i <= found; i++)
+ position += tvd.ReadVLong();
+
+ result = ReadTermVector(field, position);
}
else
{
- System.Console.Out.WriteLine("No tvx file");
+ //System.out.println("Field not found");
}
- return result;
}
+ else
+ {
+ //System.out.println("No tvx file");
+ }
+ return result;
}
-
- /// <summary>Return all term vectors stored for this document or null if the could not be read in. </summary>
- internal virtual TermFreqVector[] Get(int docNum)
+ /// <summary> Return all term vectors stored for this document or null if the could not be read in.
+ ///
+ /// </summary>
+ /// <param name="docNum">The document number to retrieve the vector for
+ /// </param>
+ /// <returns> All term frequency vectors
+ /// </returns>
+ /// <throws> IOException if there is an error reading the term vector files </throws>
+ public /*internal*/ virtual TermFreqVector[] Get(int docNum)
{
- lock (this)
+ TermFreqVector[] result = null;
+ // Check if no term vectors are available for this segment at all
+ if (tvx != null)
{
- TermFreqVector[] result = null;
- // Check if no term vectors are available for this segment at all
- if (tvx != null)
+ //We need to offset by
+ tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+ long position = tvx.ReadLong();
+
+ tvd.Seek(position);
+ int fieldCount = tvd.ReadVInt();
+
+ // No fields are vectorized for this document
+ if (fieldCount != 0)
{
- try
+ int number = 0;
+ System.String[] fields = new System.String[fieldCount];
+
+ for (int i = 0; i < fieldCount; i++)
{
- //We need to offset by
- tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
- long position = tvx.ReadLong();
-
- tvd.Seek(position);
- int fieldCount = tvd.ReadVInt();
+ if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+ number = tvd.ReadVInt();
+ else
+ number += tvd.ReadVInt();
- // No fields are vectorized for this document
- if (fieldCount != 0)
- {
- int number = 0;
- System.String[] fields = new System.String[fieldCount];
-
- for (int i = 0; i < fieldCount; i++)
- {
- number += tvd.ReadVInt();
- fields[i] = fieldInfos.FieldName(number);
- }
-
- // Compute position in the tvf file
- position = 0;
- long[] tvfPointers = new long[fieldCount];
- for (int i = 0; i < fieldCount; i++)
- {
- position += tvd.ReadVLong();
- tvfPointers[i] = position;
- }
-
- result = ReadTermVectors(fields, tvfPointers);
- }
+ fields[i] = fieldInfos.FieldName(number);
}
- catch (System.IO.IOException e)
+
+ // Compute position in the tvf file
+ position = 0;
+ long[] tvfPointers = new long[fieldCount];
+ for (int i = 0; i < fieldCount; i++)
{
- Console.Error.Write(e.StackTrace);
- Console.Error.Flush();
- }
- }
- else
- {
- System.Console.Out.WriteLine("No tvx file");
+ position += tvd.ReadVLong();
+ tvfPointers[i] = position;
+ }
+
+ result = ReadTermVectors(fields, tvfPointers);
}
- return result;
}
+ else
+ {
+ //System.out.println("No tvx file");
+ }
+ return result;
}
@@ -215,7 +243,7 @@
}
/// <summary> </summary>
- /// <param name="fieldNum">The Field to read in
+ /// <param name="field">The field to read in
/// </param>
/// <param name="tvfPointer">The pointer within the tvf file where we should start reading
/// </param>
@@ -231,21 +259,43 @@
int numTerms = tvf.ReadVInt();
//System.out.println("Num Terms: " + numTerms);
- // If no terms - return a constant empty termvector
+ // If no terms - return a constant empty termvector. However, this should never occur!
if (numTerms == 0)
return new SegmentTermVector(field, null, null);
- int length = numTerms + tvf.ReadVInt();
+ bool storePositions;
+ bool storeOffsets;
- System.String[] terms = new System.String[numTerms];
+ if (tvfFormat == TermVectorsWriter.FORMAT_VERSION)
+ {
+ byte bits = tvf.ReadByte();
+ storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
+ }
+ else
+ {
+ tvf.ReadVInt();
+ storePositions = false;
+ storeOffsets = false;
+ }
+ System.String[] terms = new System.String[numTerms];
int[] termFreqs = new int[numTerms];
+ // we may not need these, but declare them
+ int[][] positions = null;
+ TermVectorOffsetInfo[][] offsets = null;
+ if (storePositions)
+ positions = new int[numTerms][];
+ if (storeOffsets)
+ offsets = new TermVectorOffsetInfo[numTerms][];
+
int start = 0;
int deltaLength = 0;
int totalLength = 0;
- char[] buffer = new char[]{};
- System.String previousString = "";
+ char[] buffer = new char[10]; // init the buffer with a length of 10 character
+ char[] previousBuffer = new char[]{};
+
for (int i = 0; i < numTerms; i++)
{
start = tvf.ReadVInt();
@@ -253,18 +303,81 @@
totalLength = start + deltaLength;
if (buffer.Length < totalLength)
{
+ // increase buffer
+ buffer = null; // give a hint to garbage collector
buffer = new char[totalLength];
- for (int j = 0; j < previousString.Length; j++)
- // copy contents
- buffer[j] = previousString[j];
+
+ if (start > 0)
+ // just copy if necessary
+ Array.Copy(previousBuffer, 0, buffer, 0, start);
}
+
tvf.ReadChars(buffer, start, deltaLength);
terms[i] = new System.String(buffer, 0, totalLength);
- previousString = terms[i];
- termFreqs[i] = tvf.ReadVInt();
+ previousBuffer = buffer;
+ int freq = tvf.ReadVInt();
+ termFreqs[i] = freq;
+
+ if (storePositions)
+ {
+ //read in the positions
+ int[] pos = new int[freq];
+ positions[i] = pos;
+ int prevPosition = 0;
+ for (int j = 0; j < freq; j++)
+ {
+ pos[j] = prevPosition + tvf.ReadVInt();
+ prevPosition = pos[j];
+ }
+ }
+
+ if (storeOffsets)
+ {
+ TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
+ offsets[i] = offs;
+ int prevOffset = 0;
+ for (int j = 0; j < freq; j++)
+ {
+ int startOffset = prevOffset + tvf.ReadVInt();
+ int endOffset = startOffset + tvf.ReadVInt();
+ offs[j] = new TermVectorOffsetInfo(startOffset, endOffset);
+ prevOffset = endOffset;
+ }
+ }
+ }
+
+ SegmentTermVector tv;
+ if (storePositions || storeOffsets)
+ {
+ tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+ }
+ else
+ {
+ tv = new SegmentTermVector(field, terms, termFreqs);
}
- SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);
return tv;
+ }
+
+ public virtual System.Object Clone()
+ {
+
+ if (tvx == null || tvd == null || tvf == null)
+ return null;
+
+ TermVectorsReader clone = null;
+ try
+ {
+ clone = (TermVectorsReader) base.MemberwiseClone();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ clone.tvx = (IndexInput) tvx.Clone();
+ clone.tvd = (IndexInput) tvd.Clone();
+ clone.tvf = (IndexInput) tvf.Clone();
+
+ return clone;
}
}
}