You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2006/06/04 04:41:25 UTC
svn commit: r411501 [9/30] - in /incubator/lucene.net/trunk/C#/src: ./
Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/
Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/
Lucene.Net/Analysis/Standard/ Lucene.Net/Docum...
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldInfos.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs Sat Jun 3 19:41:13 2006
@@ -13,12 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
namespace Lucene.Net.Index
{
@@ -28,31 +30,33 @@
/// be adding documents at a time, with no other reader or writer threads
/// accessing this object.
/// </summary>
- sealed public class FieldInfos
+ public sealed class FieldInfos
{
+
+ internal const byte IS_INDEXED = (byte) (0x1);
+ internal const byte STORE_TERMVECTOR = (byte) (0x2);
+ internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x4);
+ internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x8);
+ internal const byte OMIT_NORMS = (byte) (0x10);
+
private System.Collections.ArrayList byNumber = new System.Collections.ArrayList();
private System.Collections.Hashtable byName = new System.Collections.Hashtable();
public /*internal*/ FieldInfos()
{
- Add("", false);
}
/// <summary> Construct a FieldInfos object using the directory and the name of the file
- /// InputStream
+ /// IndexInput
/// </summary>
- /// <param name="d">The directory to open the InputStream from
+ /// <param name="d">The directory to open the IndexInput from
/// </param>
- /// <param name="name">The name of the file to open the InputStream from in the Directory
+ /// <param name="name">The name of the file to open the IndexInput from in the Directory
/// </param>
/// <throws> IOException </throws>
- /// <summary>
- /// </summary>
- /// <seealso cref="#read">
- /// </seealso>
public /*internal*/ FieldInfos(Directory d, System.String name)
{
- InputStream input = d.OpenFile(name);
+ IndexInput input = d.OpenInput(name);
try
{
Read(input);
@@ -63,79 +67,132 @@
}
}
- /// <summary>Adds Field info for a Document. </summary>
+ /// <summary>Adds field info for a Document. </summary>
public void Add(Document doc)
{
- foreach (Field field in doc.Fields())
- {
- Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored());
- }
+ System.Collections.IEnumerator fields = doc.Fields();
+ while (fields.MoveNext())
+ {
+ Field field = (Field) fields.Current;
+ Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
+ }
}
+ /// <summary> Add fields that are indexed. Whether they have termvectors has to be specified.
+ ///
+ /// </summary>
/// <param name="names">The names of the fields
/// </param>
/// <param name="storeTermVectors">Whether the fields store term vectors or not
/// </param>
- public void AddIndexed(System.Collections.ICollection names, bool storeTermVectors)
+ /// <param name="storePositionWithTermVector">treu if positions should be stored.
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if offsets should be stored
+ /// </param>
+ public void AddIndexed(System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
System.Collections.IEnumerator i = names.GetEnumerator();
- int j = 0;
while (i.MoveNext())
{
- System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry) i.Current;
- Add((System.String) t.Key, true, storeTermVectors);
+ System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry) i.Current;
+ Add((System.String) t.Key, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
- /// <summary> Assumes the Field is not storing term vectors </summary>
+ /// <summary> Assumes the fields are not storing term vectors.
+ ///
+ /// </summary>
/// <param name="names">The names of the fields
/// </param>
/// <param name="isIndexed">Whether the fields are indexed or not
///
/// </param>
- /// <seealso cref="boolean)">
+ /// <seealso cref="Add(String, boolean)">
/// </seealso>
public void Add(System.Collections.ICollection names, bool isIndexed)
{
System.Collections.IEnumerator i = names.GetEnumerator();
- int j = 0;
while (i.MoveNext())
{
- System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry) i.Current;
- Add((System.String) t.Key, isIndexed);
+ System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry) i.Current;
+ Add((System.String) t.Key, isIndexed);
}
}
- /// <summary> Calls three parameter add with false for the storeTermVector parameter </summary>
+ /// <summary> Calls 5 parameter add with false for all TermVector parameters.
+ ///
+ /// </summary>
/// <param name="name">The name of the Field
/// </param>
- /// <param name="isIndexed">true if the Field is indexed
+ /// <param name="isIndexed">true if the field is indexed
/// </param>
- /// <seealso cref="boolean, boolean)">
+ /// <seealso cref="Add(String, boolean, boolean, boolean, boolean)">
/// </seealso>
public void Add(System.String name, bool isIndexed)
{
- Add(name, isIndexed, false);
+ Add(name, isIndexed, false, false, false, false);
}
+ /// <summary> Calls 5 parameter add with false for term vector positions and offsets.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="isIndexed"> true if the field is indexed
+ /// </param>
+ /// <param name="storeTermVector">true if the term vector should be stored
+ /// </param>
+ public void Add(System.String name, bool isIndexed, bool storeTermVector)
+ {
+ Add(name, isIndexed, storeTermVector, false, false, false);
+ }
- /// <summary>If the Field is not yet known, adds it. If it is known, checks to make
+ /// <summary>If the field is not yet known, adds it. If it is known, checks to make
/// sure that the isIndexed flag is the same as was given previously for this
- /// Field. If not - marks it as being indexed. Same goes for storeTermVector
+ /// field. If not - marks it as being indexed. Same goes for the TermVector
+ /// parameters.
///
/// </summary>
- /// <param name="name">The name of the Field
+ /// <param name="name">The name of the field
/// </param>
- /// <param name="isIndexed">true if the Field is indexed
+ /// <param name="isIndexed">true if the field is indexed
/// </param>
/// <param name="storeTermVector">true if the term vector should be stored
/// </param>
- public void Add(System.String name, bool isIndexed, bool storeTermVector)
+ /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
+ /// </param>
+ public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+ {
+
+ Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ }
+
+ /// <summary>If the field is not yet known, adds it. If it is known, checks to make
+ /// sure that the isIndexed flag is the same as was given previously for this
+ /// field. If not - marks it as being indexed. Same goes for the TermVector
+ /// parameters.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="isIndexed">true if the field is indexed
+ /// </param>
+ /// <param name="storeTermVector">true if the term vector should be stored
+ /// </param>
+ /// <param name="storePositionWithTermVector">true if the term vector with positions should be stored
+ /// </param>
+ /// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
+ /// </param>
+ /// <param name="omitNorms">true if the norms for the indexed field should be omitted
+ /// </param>
+ public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
{
FieldInfo fi = FieldInfo(name);
if (fi == null)
{
- AddInternal(name, isIndexed, storeTermVector);
+ AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms);
}
else
{
@@ -147,23 +204,42 @@
{
fi.storeTermVector = true; // once vector, always vector
}
+ if (fi.storePositionWithTermVector != storePositionWithTermVector)
+ {
+ fi.storePositionWithTermVector = true; // once vector, always vector
+ }
+ if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector)
+ {
+ fi.storeOffsetWithTermVector = true; // once vector, always vector
+ }
+ if (fi.omitNorms != omitNorms)
+ {
+ fi.omitNorms = false; // once norms are stored, always store
+ }
}
}
- private void AddInternal(System.String name, bool isIndexed, bool storeTermVector)
+
+ private void AddInternal(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
{
- FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector);
+ FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms);
byNumber.Add(fi);
byName[name] = fi;
}
public int FieldNumber(System.String fieldName)
{
- FieldInfo fi = FieldInfo(fieldName);
- if (fi != null)
- return fi.number;
- else
+ try
+ {
+ FieldInfo fi = FieldInfo(fieldName);
+ if (fi != null)
+ return fi.number;
+ }
+ catch (System.IndexOutOfRangeException ioobe)
+ {
return - 1;
+ }
+ return - 1;
}
public FieldInfo FieldInfo(System.String fieldName)
@@ -171,14 +247,42 @@
return (FieldInfo) byName[fieldName];
}
+ /// <summary> Return the fieldName identified by its number.
+ ///
+ /// </summary>
+ /// <param name="fieldNumber">
+ /// </param>
+ /// <returns> the fieldName or an empty string when the field
+ /// with the given number doesn't exist.
+ /// </returns>
public System.String FieldName(int fieldNumber)
{
- return FieldInfo(fieldNumber).name;
+ try
+ {
+ return FieldInfo(fieldNumber).name;
+ }
+ catch (System.NullReferenceException)
+ {
+ return "";
+ }
}
+ /// <summary> Return the fieldinfo object referenced by the fieldNumber.</summary>
+ /// <param name="fieldNumber">
+ /// </param>
+ /// <returns> the FieldInfo object or null when the given fieldNumber
+ /// doesn't exist.
+ /// </returns>
public FieldInfo FieldInfo(int fieldNumber)
{
- return (FieldInfo) byNumber[fieldNumber];
+ try
+ {
+ return (FieldInfo) byNumber[fieldNumber];
+ }
+ catch (System.ArgumentOutOfRangeException) // (System.IndexOutOfRangeException)
+ {
+ return null;
+ }
}
public int Size()
@@ -192,14 +296,17 @@
for (int i = 0; i < Size(); i++)
{
if (FieldInfo(i).storeTermVector)
+ {
hasVectors = true;
+ break;
+ }
}
return hasVectors;
}
public void Write(Directory d, System.String name)
{
- OutputStream output = d.CreateFile(name);
+ IndexOutput output = d.CreateOutput(name);
try
{
Write(output);
@@ -210,7 +317,7 @@
}
}
- public void Write(OutputStream output)
+ public void Write(IndexOutput output)
{
output.WriteVInt(Size());
for (int i = 0; i < Size(); i++)
@@ -218,26 +325,34 @@
FieldInfo fi = FieldInfo(i);
byte bits = (byte) (0x0);
if (fi.isIndexed)
- bits |= (byte) (0x1);
+ bits |= IS_INDEXED;
if (fi.storeTermVector)
- bits |= (byte) (0x2);
+ bits |= STORE_TERMVECTOR;
+ if (fi.storePositionWithTermVector)
+ bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+ if (fi.storeOffsetWithTermVector)
+ bits |= STORE_OFFSET_WITH_TERMVECTOR;
+ if (fi.omitNorms)
+ bits |= OMIT_NORMS;
output.WriteString(fi.name);
- //Was REMOVE
- //output.writeByte((byte)(fi.isIndexed ? 1 : 0));
output.WriteByte(bits);
}
}
- private void Read(InputStream input)
+ private void Read(IndexInput input)
{
int size = input.ReadVInt(); //read in the size
for (int i = 0; i < size; i++)
{
System.String name = String.Intern(input.ReadString());
byte bits = input.ReadByte();
- bool isIndexed = (bits & 0x1) != 0;
- bool storeTermVector = (bits & 0x2) != 0;
- AddInternal(name, isIndexed, storeTermVector);
+ bool isIndexed = (bits & IS_INDEXED) != 0;
+ bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
+ bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+ bool omitNorms = (bits & OMIT_NORMS) != 0;
+
+ AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldsReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs Sat Jun 3 19:41:13 2006
@@ -13,11 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
namespace Lucene.Net.Index
{
@@ -26,21 +28,21 @@
/// It uses <segment>.fdt and <segment>.fdx; files.
///
/// </summary>
- /// <version> $Id: FieldsReader.java,v 1.7 2004/03/29 22:48:02 cutting Exp $
+ /// <version> $Id: FieldsReader.java 329524 2005-10-30 05:38:46Z yonik $
/// </version>
- sealed public class FieldsReader
+ public sealed class FieldsReader
{
private FieldInfos fieldInfos;
- private InputStream fieldsStream;
- private InputStream indexStream;
+ private IndexInput fieldsStream;
+ private IndexInput indexStream;
private int size;
public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn)
{
fieldInfos = fn;
- fieldsStream = d.OpenFile(segment + ".fdt");
- indexStream = d.OpenFile(segment + ".fdx");
+ fieldsStream = d.OpenInput(segment + ".fdt");
+ indexStream = d.OpenInput(segment + ".fdx");
size = (int) (indexStream.Length() / 8);
}
@@ -71,10 +73,82 @@
byte bits = fieldsStream.ReadByte();
- doc.Add(new Field(fi.name, fieldsStream.ReadString(), true, fi.isIndexed, (bits & 1) != 0, fi.storeTermVector)); // vector
+ bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
+ bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
+
+ if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0)
+ {
+ byte[] b = new byte[fieldsStream.ReadVInt()];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ if (compressed)
+ doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
+ else
+ doc.Add(new Field(fi.name, b, Field.Store.YES));
+ }
+ else
+ {
+ Field.Index index;
+ Field.Store store = Field.Store.YES;
+
+ if (fi.isIndexed && tokenize)
+ index = Field.Index.TOKENIZED;
+ else if (fi.isIndexed && !tokenize)
+ index = Field.Index.UN_TOKENIZED;
+ else
+ index = Field.Index.NO;
+
+ Field.TermVector termVector = null;
+ if (fi.storeTermVector)
+ {
+ if (fi.storeOffsetWithTermVector)
+ {
+ if (fi.storePositionWithTermVector)
+ {
+ termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ else
+ {
+ termVector = Field.TermVector.WITH_OFFSETS;
+ }
+ }
+ else if (fi.storePositionWithTermVector)
+ {
+ termVector = Field.TermVector.WITH_POSITIONS;
+ }
+ else
+ {
+ termVector = Field.TermVector.YES;
+ }
+ }
+ else
+ {
+ termVector = Field.TermVector.NO;
+ }
+
+ if (compressed)
+ {
+ store = Field.Store.COMPRESS;
+ byte[] b = new byte[fieldsStream.ReadVInt()];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
+ f.SetOmitNorms(fi.omitNorms);
+ doc.Add(f);
+ }
+ else
+ {
+ Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
+ f.SetOmitNorms(fi.omitNorms);
+ doc.Add(f);
+ }
+ }
}
return doc;
+ }
+
+ private byte[] Uncompress(byte[] input)
+ {
+ return SupportClass.CompressionSupport.Uncompress(input);
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldsWriter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs Sat Jun 3 19:41:13 2006
@@ -13,25 +13,33 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using Directory = Lucene.Net.Store.Directory;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
namespace Lucene.Net.Index
{
sealed class FieldsWriter
{
+ internal const byte FIELD_IS_TOKENIZED = (byte) (0x1);
+ internal const byte FIELD_IS_BINARY = (byte) (0x2);
+ internal const byte FIELD_IS_COMPRESSED = (byte) (0x4);
+
private FieldInfos fieldInfos;
- private OutputStream fieldsStream;
- private OutputStream indexStream;
+
+ private IndexOutput fieldsStream;
+
+ private IndexOutput indexStream;
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn)
{
fieldInfos = fn;
- fieldsStream = d.CreateFile(segment + ".fdt");
- indexStream = d.CreateFile(segment + ".fdx");
+ fieldsStream = d.CreateOutput(segment + ".fdt");
+ indexStream = d.CreateOutput(segment + ".fdx");
}
internal void Close()
@@ -45,27 +53,72 @@
indexStream.WriteLong(fieldsStream.GetFilePointer());
int storedCount = 0;
- foreach (Field field in doc.Fields())
- {
+ System.Collections.IEnumerator fields = doc.Fields();
+ while (fields.MoveNext())
+ {
+ Field field = (Field) fields.Current;
if (field.IsStored())
storedCount++;
}
fieldsStream.WriteVInt(storedCount);
- foreach (Field field in doc.Fields())
- {
+ fields = doc.Fields();
+ while (fields.MoveNext())
+ {
+ Field field = (Field) fields.Current;
if (field.IsStored())
{
fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));
byte bits = 0;
if (field.IsTokenized())
- bits |= 1;
+ bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+ if (field.IsBinary())
+ bits |= FieldsWriter.FIELD_IS_BINARY;
+ if (field.IsCompressed())
+ bits |= FieldsWriter.FIELD_IS_COMPRESSED;
+
fieldsStream.WriteByte(bits);
- fieldsStream.WriteString(field.StringValue());
+ if (field.IsCompressed())
+ {
+ // compression is enabled for the current field
+ byte[] data = null;
+ // check if it is a binary field
+ if (field.IsBinary())
+ {
+ data = Compress(field.BinaryValue());
+ }
+ else
+ {
+ data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
+ }
+ int len = data.Length;
+ fieldsStream.WriteVInt(len);
+ fieldsStream.WriteBytes(data, len);
+ }
+ else
+ {
+ // compression is disabled for the current field
+ if (field.IsBinary())
+ {
+ byte[] data = field.BinaryValue();
+ int len = data.Length;
+ fieldsStream.WriteVInt(len);
+ fieldsStream.WriteBytes(data, len);
+ }
+ else
+ {
+ fieldsStream.WriteString(field.StringValue());
+ }
+ }
}
}
+ }
+
+ private byte[] Compress(byte[] input)
+ {
+ return SupportClass.CompressionSupport.Compress(input);
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FilterIndexReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs Sat Jun 3 19:41:13 2006
@@ -13,8 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+
namespace Lucene.Net.Index
{
@@ -75,10 +78,10 @@
}
/// <summary>Base class for filtering {@link TermPositions} implementations. </summary>
- public class FilterTermPositions:FilterTermDocs, TermPositions
+ public class FilterTermPositions : FilterTermDocs, TermPositions
{
- public FilterTermPositions(TermPositions in_Renamed):base(in_Renamed)
+ public FilterTermPositions(TermPositions in_Renamed) : base(in_Renamed)
{
}
@@ -89,7 +92,7 @@
}
/// <summary>Base class for filtering {@link TermEnum} implementations. </summary>
- public class FilterTermEnum:TermEnum
+ public class FilterTermEnum : TermEnum
{
protected internal TermEnum in_Renamed;
@@ -167,6 +170,11 @@
in_Renamed.UndeleteAll();
}
+ public override bool HasNorms(System.String field)
+ {
+ return in_Renamed.HasNorms(field);
+ }
+
public override byte[] Norms(System.String f)
{
return in_Renamed.Norms(f);
@@ -227,15 +235,14 @@
return in_Renamed.GetFieldNames(indexed);
}
- /// <summary> </summary>
- /// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info,
- /// else only indexed fields without term vector info
- /// </param>
- /// <returns> Collection of Strings indicating the names of the fields
- /// </returns>
- public override System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector)
+ public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
+ {
+ return in_Renamed.GetIndexedFieldNames(tvSpec);
+ }
+
+ public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
{
- return in_Renamed.GetIndexedFieldNames(storedTermVector);
+ return in_Renamed.GetFieldNames(fieldNames);
}
}
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexFileNameFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexFileNameFilter.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexFileNameFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexFileNameFilter.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Filename filter that accept filenames and extensions only created by Lucene.
+ ///
+ /// </summary>
+ /// <author> Daniel Naber / Bernhard Messer
+ /// </author>
+ /// <version> $rcs = ' $Id: Exp $ ' ;
+ /// </version>
+ public class IndexFileNameFilter // : FilenameFilter {{Aroush-1.9}}
+ {
+
+ /* (non-Javadoc)
+ * @see java.io.FilenameFilter#accept(java.io.File, java.lang.String)
+ */
+ public virtual bool Accept(System.IO.FileInfo dir, System.String name)
+ {
+ for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
+ {
+ if (name.EndsWith("." + IndexFileNames.INDEX_EXTENSIONS[i]))
+ return true;
+ }
+ if (name.Equals(IndexFileNames.DELETABLE))
+ return true;
+ else if (name.Equals(IndexFileNames.SEGMENTS))
+ return true;
+ else if (true) // else if (name.Matches(".+\\.f\\d+")) // {{Aroush-1.9}}
+ return true;
+ return false;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexFileNames.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexFileNames.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexFileNames.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexFileNames.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> Useful constants representing filenames and extensions used by lucene
+ ///
+ /// </summary>
+ /// <author> Bernhard Messer
+ /// </author>
+ /// <version> $rcs = ' $Id: Exp $ ' ;
+ /// </version>
+ sealed class IndexFileNames
+ {
+
+ /// <summary>Name of the index segment file </summary>
+ internal const System.String SEGMENTS = "segments";
+
+ /// <summary>Name of the index deletable file </summary>
+ internal const System.String DELETABLE = "deletable";
+
+ /// <summary> This array contains all filename extensions used by Lucene's index files, with
+ /// one exception, namely the extension made up from <code>.f</code> + a number.
+ /// Also note that two of Lucene's files (<code>deletable</code> and
+ /// <code>segments</code>) don't have any filename extension.
+ /// </summary>
+ internal static readonly System.String[] INDEX_EXTENSIONS = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"};
+
+ /// <summary>File extensions of old-style index files </summary>
+ internal static readonly System.String[] COMPOUND_EXTENSIONS = new System.String[]{"fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"};
+
+ /// <summary>File extensions for term vector support </summary>
+ internal static readonly System.String[] VECTOR_EXTENSIONS = new System.String[]{"tvx", "tvd", "tvf"};
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexModifier.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexModifier.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,602 @@
+/*
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
+using FSDirectory = Lucene.Net.Store.FSDirectory;
+
+namespace Lucene.Net.Index
+{
+
+ /// <summary> A class to modify an index, i.e. to delete and add documents. This
+ /// class hides {@link IndexReader} and {@link IndexWriter} so that you
+ /// do not need to care about implementation details such as that adding
+ /// documents is done via IndexWriter and deletion is done via IndexReader.
+ ///
+ /// <p>Note that you cannot create more than one <code>IndexModifier</code> object
+ /// on the same directory at the same time.
+ ///
+ /// <p>Example usage:
+ ///
+ /// <!-- ======================================================== -->
+ /// <!-- = Java Sourcecode to HTML automatically converted code = -->
+ /// <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
+ /// <!-- = Further information: http://www.java2html.de = -->
+ /// <div align="left" class="java">
+ /// <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
+ /// <tr>
+ /// <!-- start source code -->
+ /// <td nowrap="nowrap" valign="top" align="left">
+ /// <code>
+ /// <font color="#ffffff"> </font><font color="#000000">Analyzer analyzer = </font><font color="#7f0055"><b>new </b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#3f7f5f">// create an index in /tmp/index, overwriting an existing one:</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">IndexModifier indexModifier = </font><font color="#7f0055"><b>new </b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">"/tmp/index"</font><font color="#000000">, analyzer, </font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">Document doc = </font><font color="#7f0055"><b>new </b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">, Field.Store.YES, Field.Index.UN_TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"body"</font><font color="#000000">, </font><font color="#2a00ff">"a simple test"</font><font color="#000000">, Field.Store.YES, Field.Index.TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#7f0055"><b>int </b></font><font color="#000000">deleted = indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">))</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">"Deleted " </font><font color="#000000">+ deleted + </font><font color="#2a00ff">" document"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">() </font><font color="#000000">+ </font><font color="#2a00ff">" docs in index"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
+ /// <font color="#ffffff"> </font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
+ /// </td>
+ /// <!-- end source code -->
+ /// </tr>
+ /// </table>
+ /// </div>
+ /// <!-- = END of automatically generated HTML code = -->
+ /// <!-- ======================================================== -->
+ ///
+ /// <p>Not all methods of IndexReader and IndexWriter are offered by this
+ /// class. If you need access to additional methods, either use those classes
+ /// directly or implement your own class that extends <code>IndexModifier</code>.
+ ///
+ /// <p>Although an instance of this class can be used from more than one
+ /// thread, you will not get the best performance. You might want to use
+ /// IndexReader and IndexWriter directly for that (but you will need to
+ /// care about synchronization yourself then).
+ ///
+ /// <p>While you can freely mix calls to add() and delete() using this class,
+ /// you should batch you calls for best performance. For example, if you
+ /// want to update 20 documents, you should first delete all those documents,
+ /// then add all the new documents.
+ ///
+ /// </summary>
+ /// <author> Daniel Naber
+ /// </author>
+ public class IndexModifier
+ {
+ private void InitBlock()
+ {
+ maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
+ maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
+ mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
+ }
+
+ protected internal IndexWriter indexWriter = null;
+ protected internal IndexReader indexReader = null;
+
+ protected internal Directory directory = null;
+ protected internal Analyzer analyzer = null;
+ protected internal bool open = false;
+
+ // Lucene defaults:
+ protected internal System.IO.StreamWriter infoStream = null;
+ protected internal bool useCompoundFile = true;
+ protected internal int maxBufferedDocs;
+ protected internal int maxFieldLength;
+ protected internal int mergeFactor;
+
+ /// <summary> Open an index with write access.
+ ///
+ /// </summary>
+ /// <param name="directory">the index directory
+ /// </param>
+ /// <param name="analyzer">the analyzer to use for adding new documents
+ /// </param>
+ /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
+ /// <code>false</code> to append to the existing index
+ /// </param>
+ public IndexModifier(Directory directory, Analyzer analyzer, bool create)
+ {
+ InitBlock();
+ Init(directory, analyzer, create);
+ }
+
+ /// <summary> Open an index with write access.
+ ///
+ /// </summary>
+ /// <param name="dirName">the index directory
+ /// </param>
+ /// <param name="analyzer">the analyzer to use for adding new documents
+ /// </param>
+ /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
+ /// <code>false</code> to append to the existing index
+ /// </param>
+ public IndexModifier(System.String dirName, Analyzer analyzer, bool create)
+ {
+ InitBlock();
+ Directory dir = FSDirectory.GetDirectory(dirName, create);
+ Init(dir, analyzer, create);
+ }
+
+ /// <summary> Open an index with write access.
+ ///
+ /// </summary>
+ /// <param name="file">the index directory
+ /// </param>
+ /// <param name="analyzer">the analyzer to use for adding new documents
+ /// </param>
+ /// <param name="create"><code>true</code> to create the index or overwrite the existing one;
+ /// <code>false</code> to append to the existing index
+ /// </param>
+ public IndexModifier(System.IO.FileInfo file, Analyzer analyzer, bool create)
+ {
+ InitBlock();
+ Directory dir = FSDirectory.GetDirectory(file, create);
+ Init(dir, analyzer, create);
+ }
+
+ /// <summary> Initialize an IndexWriter.</summary>
+ /// <throws> IOException </throws>
+ protected internal virtual void Init(Directory directory, Analyzer analyzer, bool create)
+ {
+ this.directory = directory;
+ lock (this.directory)
+ {
+ this.analyzer = analyzer;
+ indexWriter = new IndexWriter(directory, analyzer, create);
+ open = true;
+ }
+ }
+
+ /// <summary> Throw an IllegalStateException if the index is closed.</summary>
+ /// <throws> IllegalStateException </throws>
+ protected internal virtual void AssureOpen()
+ {
+ if (!open)
+ {
+ throw new System.SystemException("Index is closed");
+ }
+ }
+
+ /// <summary> Close the IndexReader and open an IndexWriter.</summary>
+ /// <throws> IOException </throws>
+ protected internal virtual void CreateIndexWriter()
+ {
+ if (indexWriter == null)
+ {
+ if (indexReader != null)
+ {
+ indexReader.Close();
+ indexReader = null;
+ }
+ indexWriter = new IndexWriter(directory, analyzer, false);
+ indexWriter.SetInfoStream(infoStream);
+ indexWriter.SetUseCompoundFile(useCompoundFile);
+ indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
+ indexWriter.SetMaxFieldLength(maxFieldLength);
+ indexWriter.SetMergeFactor(mergeFactor);
+ }
+ }
+
+ /// <summary> Close the IndexWriter and open an IndexReader.</summary>
+ /// <throws> IOException </throws>
+ protected internal virtual void CreateIndexReader()
+ {
+ if (indexReader == null)
+ {
+ if (indexWriter != null)
+ {
+ indexWriter.Close();
+ indexWriter = null;
+ }
+ indexReader = IndexReader.Open(directory);
+ }
+ }
+
+ /// <summary> Make sure all changes are written to disk.</summary>
+ /// <throws> IOException </throws>
+ public virtual void Flush()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ indexWriter.Close();
+ indexWriter = null;
+ CreateIndexWriter();
+ }
+ else
+ {
+ indexReader.Close();
+ indexReader = null;
+ CreateIndexReader();
+ }
+ }
+ }
+
+ /// <summary> Adds a document to this index, using the provided analyzer instead of the
+ /// one specific in the constructor. If the document contains more than
+ /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
+ /// discarded.
+ /// </summary>
+ /// <seealso cref="IndexWriter.AddDocument(Document, Analyzer)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void AddDocument(Document doc, Analyzer docAnalyzer)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ if (docAnalyzer != null)
+ indexWriter.AddDocument(doc, docAnalyzer);
+ else
+ indexWriter.AddDocument(doc);
+ }
+ }
+
+ /// <summary> Adds a document to this index. If the document contains more than
+ /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
+ /// discarded.
+ /// </summary>
+ /// <seealso cref="IndexWriter.AddDocument(Document)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void AddDocument(Document doc)
+ {
+ AddDocument(doc, null);
+ }
+
+ /// <summary> Deletes all documents containing <code>term</code>.
+ /// This is useful if one uses a document field to hold a unique ID string for
+ /// the document. Then to delete such a document, one merely constructs a
+ /// term with the appropriate field and the unique ID string as its text and
+ /// passes it to this method. Returns the number of documents deleted.
+ /// </summary>
+ /// <returns> the number of documents deleted
+ /// </returns>
+ /// <seealso cref="IndexReader.DeleteDocuments(Term)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual int DeleteDocuments(Term term)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexReader();
+ return indexReader.DeleteDocuments(term);
+ }
+ }
+
+ /// <summary> Deletes all documents containing <code>term</code>.
+ /// This is useful if one uses a document field to hold a unique ID string for
+ /// the document. Then to delete such a document, one merely constructs a
+ /// term with the appropriate field and the unique ID string as its text and
+ /// passes it to this method. Returns the number of documents deleted.
+ /// </summary>
+ /// <returns> the number of documents deleted
+ /// </returns>
+ /// <seealso cref="IndexReader.DeleteDocuments(Term)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ /// <deprecated> Use {@link #DeleteDocuments(Term)} instead.
+ /// </deprecated>
+ public virtual int Delete(Term term)
+ {
+ return DeleteDocuments(term);
+ }
+
+ /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
+ /// <seealso cref="IndexReader.DeleteDocument(int)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void DeleteDocument(int docNum)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexReader();
+ indexReader.DeleteDocument(docNum);
+ }
+ }
+
+ /// <summary> Deletes the document numbered <code>docNum</code>.</summary>
+ /// <seealso cref="IndexReader.DeleteDocument(int)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ /// <deprecated> Use {@link #DeleteDocument(int)} instead.
+ /// </deprecated>
+ public virtual void Delete(int docNum)
+ {
+ DeleteDocument(docNum);
+ }
+
+ /// <summary> Returns the number of documents currently in this index.</summary>
+ /// <seealso cref="IndexWriter.DocCount()">
+ /// </seealso>
+ /// <seealso cref="IndexReader.NumDocs()">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual int DocCount()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ return indexWriter.DocCount();
+ }
+ else
+ {
+ return indexReader.NumDocs();
+ }
+ }
+ }
+
+ /// <summary> Merges all segments together into a single segment, optimizing an index
+ /// for search.
+ /// </summary>
+ /// <seealso cref="IndexWriter.Optimize()">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void Optimize()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ indexWriter.Optimize();
+ }
+ }
+
+ /// <summary> If non-null, information about merges and a message when
+ /// {@link #GetMaxFieldLength()} is reached will be printed to this.
+ /// <p>Example: <tt>index.setInfoStream(System.err);</tt>
+ /// </summary>
+ /// <seealso cref="IndexWriter.SetInfoStream(PrintStream)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ indexWriter.SetInfoStream(infoStream);
+ }
+ this.infoStream = infoStream;
+ }
+ }
+
+ /// <throws> IOException </throws>
+ /// <seealso cref="IndexModifier.SetInfoStream(PrintStream)">
+ /// </seealso>
+ public virtual System.IO.TextWriter GetInfoStream()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ return indexWriter.GetInfoStream();
+ }
+ }
+
+ /// <summary> Setting to turn on usage of a compound file. When on, multiple files
+ /// for each segment are merged into a single file once the segment creation
+ /// is finished. This is done regardless of what directory is in use.
+ /// </summary>
+ /// <seealso cref="IndexWriter.SetUseCompoundFile(boolean)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void SetUseCompoundFile(bool useCompoundFile)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ indexWriter.SetUseCompoundFile(useCompoundFile);
+ }
+ this.useCompoundFile = useCompoundFile;
+ }
+ }
+
+ /// <throws> IOException </throws>
+ /// <seealso cref="IndexModifier.SetUseCompoundFile(boolean)">
+ /// </seealso>
+ public virtual bool GetUseCompoundFile()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ return indexWriter.GetUseCompoundFile();
+ }
+ }
+
+ /// <summary> The maximum number of terms that will be indexed for a single field in a
+ /// document. This limits the amount of memory required for indexing, so that
+ /// collections with very large files will not crash the indexing process by
+ /// running out of memory.<p/>
+ /// Note that this effectively truncates large documents, excluding from the
+ /// index terms that occur further in the document. If you know your source
+ /// documents are large, be sure to set this value high enough to accomodate
+ /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
+ /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
+ /// By default, no more than 10,000 terms will be indexed for a field.
+ /// </summary>
+ /// <seealso cref="IndexWriter.SetMaxFieldLength(int)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void SetMaxFieldLength(int maxFieldLength)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ indexWriter.SetMaxFieldLength(maxFieldLength);
+ }
+ this.maxFieldLength = maxFieldLength;
+ }
+ }
+
+ /// <throws> IOException </throws>
+ /// <seealso cref="IndexModifier.SetMaxFieldLength(int)">
+ /// </seealso>
+ public virtual int GetMaxFieldLength()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ return indexWriter.GetMaxFieldLength();
+ }
+ }
+
+ /// <summary> The maximum number of terms that will be indexed for a single field in a
+ /// document. This limits the amount of memory required for indexing, so that
+ /// collections with very large files will not crash the indexing process by
+ /// running out of memory.<p/>
+ /// Note that this effectively truncates large documents, excluding from the
+ /// index terms that occur further in the document. If you know your source
+ /// documents are large, be sure to set this value high enough to accomodate
+ /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
+ /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
+ /// By default, no more than 10,000 terms will be indexed for a field.
+ /// </summary>
+ /// <seealso cref="IndexWriter.SetMaxBufferedDocs(int)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
+ }
+ this.maxBufferedDocs = maxBufferedDocs;
+ }
+ }
+
+ /// <throws> IOException </throws>
+ /// <seealso cref="IndexModifier.SetMaxBufferedDocs(int)">
+ /// </seealso>
+ public virtual int GetMaxBufferedDocs()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ return indexWriter.GetMaxBufferedDocs();
+ }
+ }
+
+ /// <summary> Determines how often segment indices are merged by addDocument(). With
+ /// smaller values, less RAM is used while indexing, and searches on
+ /// unoptimized indices are faster, but indexing speed is slower. With larger
+ /// values, more RAM is used during indexing, and while searches on unoptimized
+ /// indices are slower, indexing is faster. Thus larger values (> 10) are best
+ /// for batch index creation, and smaller values (< 10) for indices that are
+ /// interactively maintained.
+ /// <p>This must never be less than 2. The default value is 10.
+ ///
+ /// </summary>
+ /// <seealso cref="IndexWriter.SetMergeFactor(int)">
+ /// </seealso>
+ /// <throws> IllegalStateException if the index is closed </throws>
+ public virtual void SetMergeFactor(int mergeFactor)
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ if (indexWriter != null)
+ {
+ indexWriter.SetMergeFactor(mergeFactor);
+ }
+ this.mergeFactor = mergeFactor;
+ }
+ }
+
+ /// <throws> IOException </throws>
+ /// <seealso cref="IndexModifier.SetMergeFactor(int)">
+ /// </seealso>
+ public virtual int GetMergeFactor()
+ {
+ lock (directory)
+ {
+ AssureOpen();
+ CreateIndexWriter();
+ return indexWriter.GetMergeFactor();
+ }
+ }
+
+ /// <summary> Close this index, writing all pending changes to disk.
+ ///
+ /// </summary>
+ /// <throws> IllegalStateException if the index has been closed before already </throws>
+ public virtual void Close()
+ {
+ lock (directory)
+ {
+ if (!open)
+ throw new System.SystemException("Index is closed already");
+ if (indexWriter != null)
+ {
+ indexWriter.Close();
+ indexWriter = null;
+ }
+ else
+ {
+ indexReader.Close();
+ indexReader = null;
+ }
+ open = false;
+ }
+ }
+
+ public override System.String ToString()
+ {
+ return "Index@" + directory;
+ }
+
+ /*
+ // used as an example in the javadoc:
+ public static void main(String[] args) throws IOException {
+ Analyzer analyzer = new StandardAnalyzer();
+ // create an index in /tmp/index, overwriting an existing one:
+ IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
+ Document doc = new Document();
+ doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
+ indexModifier.addDocument(doc);
+ int deleted = indexModifier.delete(new Term("id", "1"));
+ System.out.println("Deleted " + deleted + " document");
+ indexModifier.flush();
+ System.out.println(indexModifier.docCount() + " docs in index");
+ indexModifier.close();
+ }*/
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexReader.cs Sat Jun 3 19:41:13 2006
@@ -13,13 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using Similarity = Lucene.Net.Search.Similarity;
using Directory = Lucene.Net.Store.Directory;
using FSDirectory = Lucene.Net.Store.FSDirectory;
+using IndexInput = Lucene.Net.Store.IndexInput;
using Lock = Lucene.Net.Store.Lock;
+
namespace Lucene.Net.Index
{
@@ -27,16 +30,19 @@
/// index. Search of an index is done entirely through this abstract interface,
/// so that any subclass which implements it is searchable.
/// <p> Concrete subclasses of IndexReader are usually constructed with a call to
- /// the static method {@link #open}.
+ /// one of the static <code>open()</code> methods, e.g. {@link #Open(String)}.
/// <p> For efficiency, in this API documents are often referred to via
/// <i>document numbers</i>, non-negative integers which each name a unique
/// document in the index. These document numbers are ephemeral--they may change
/// as documents are added to and deleted from an index. Clients should thus not
/// rely on a given document having the same number between sessions.
/// </summary>
+ /// <summary><p> An IndexReader can be opened on a directory for which an IndexWriter is
+ /// opened already, but it cannot be used to delete documents from the index then.
+ /// </summary>
/// <author> Doug Cutting
/// </author>
- /// <version> $Id: IndexReader.java,v 1.32 2004/04/21 16:46:30 goller Exp $
+ /// <version> $Id: IndexReader.java 354917 2005-12-08 00:22:00Z ehatcher $
/// </version>
public abstract class IndexReader
{
@@ -49,7 +55,7 @@
}
private Lucene.Net.Store.Directory directory;
private bool closeDirectory;
- internal AnonymousClassWith(Lucene.Net.Store.Directory directory, bool closeDirectory, Lucene.Net.Store.Lock Param1, long Param2) : base(Param1, Param2)
+ internal AnonymousClassWith(Lucene.Net.Store.Directory directory, bool closeDirectory, Lucene.Net.Store.Lock Param1, long Param2):base(Param1, Param2)
{
InitBlock(directory, closeDirectory);
}
@@ -60,15 +66,12 @@
if (infos.Count == 1)
{
// index is optimized
- return new SegmentReader(infos, infos.Info(0), closeDirectory);
- }
- else
- {
- IndexReader[] readers = new IndexReader[infos.Count];
- for (int i = 0; i < infos.Count; i++)
- readers[i] = new SegmentReader(infos.Info(i));
- return new MultiReader(directory, infos, closeDirectory, readers);
+ return SegmentReader.Get(infos, infos.Info(0), closeDirectory);
}
+ IndexReader[] readers = new IndexReader[infos.Count];
+ for (int i = 0; i < infos.Count; i++)
+ readers[i] = SegmentReader.Get(infos.Info(i));
+ return new MultiReader(directory, infos, closeDirectory, readers);
}
}
private class AnonymousClassWith1 : Lock.With
@@ -98,6 +101,40 @@
}
}
+ public sealed class FieldOption
+ {
+ private System.String option;
+ internal FieldOption()
+ {
+ }
+ internal FieldOption(System.String option)
+ {
+ this.option = option;
+ }
+ public override System.String ToString()
+ {
+ return this.option;
+ }
+ // all fields
+ public static readonly FieldOption ALL = new FieldOption("ALL");
+ // all indexed fields
+ public static readonly FieldOption INDEXED = new FieldOption("INDEXED");
+ // all fields which are not indexed
+ public static readonly FieldOption UNINDEXED = new FieldOption("UNINDEXED");
+ // all fields which are indexed with termvectors enables
+ public static readonly FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption("INDEXED_WITH_TERMVECTOR");
+ // all fields which are indexed but don't have termvectors enabled
+ public static readonly FieldOption INDEXED_NO_TERMVECTOR = new FieldOption("INDEXED_NO_TERMVECTOR");
+ // all fields where termvectors are enabled. Please note that only standard termvector fields are returned
+ public static readonly FieldOption TERMVECTOR = new FieldOption("TERMVECTOR");
+ // all field with termvectors wiht positions enabled
+ public static readonly FieldOption TERMVECTOR_WITH_POSITION = new FieldOption("TERMVECTOR_WITH_POSITION");
+ // all fields where termvectors with offset position are set
+ public static readonly FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption("TERMVECTOR_WITH_OFFSET");
+ // all fields where termvectors with offset and position values set
+ public static readonly FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption("TERMVECTOR_WITH_POSITION_OFFSET");
+ }
+
/// <summary> Constructor used if IndexReader is not owner of its directory.
/// This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
///
@@ -107,12 +144,6 @@
protected internal IndexReader(Directory directory)
{
this.directory = directory;
- segmentInfos = null;
- directoryOwner = false;
- closeDirectory = false;
- stale = false;
- hasChanges = false;
- writeLock = null;
}
/// <summary> Constructor used if IndexReader is owner of its directory.
@@ -123,28 +154,30 @@
/// </param>
/// <param name="segmentInfos">Used for write-l
/// </param>
- /// <param name="">closeDirectory
+ /// <param name="closeDirectory">
/// </param>
internal IndexReader(Directory directory, SegmentInfos segmentInfos, bool closeDirectory)
{
+ Init(directory, segmentInfos, closeDirectory, true);
+ }
+
+ internal virtual void Init(Directory directory, SegmentInfos segmentInfos, bool closeDirectory, bool directoryOwner)
+ {
this.directory = directory;
this.segmentInfos = segmentInfos;
- directoryOwner = true;
+ this.directoryOwner = directoryOwner;
this.closeDirectory = closeDirectory;
- stale = false;
- hasChanges = false;
- writeLock = null;
}
private Directory directory;
-
private bool directoryOwner;
+ private bool closeDirectory;
+
private SegmentInfos segmentInfos;
private Lock writeLock;
private bool stale;
private bool hasChanges;
- private bool closeDirectory;
/// <summary>Returns an IndexReader reading the index in an FSDirectory in the named
/// path.
@@ -183,59 +216,36 @@
return directory;
}
- /// <summary> Returns the time the index in the named directory was last modified.
- ///
- /// <p>Synchronization of IndexReader and IndexWriter instances is
- /// no longer done via time stamps of the segments file since the time resolution
- /// depends on the hardware platform. Instead, a version number is maintained
- /// within the segments file, which is incremented everytime when the index is
- /// changed.</p>
- ///
+ /// <summary> Returns the time the index in the named directory was last modified.
+ /// Do not use this to check whether the reader is still up-to-date, use
+ /// {@link #IsCurrent()} instead.
/// </summary>
- /// <deprecated> Replaced by {@link #GetCurrentVersion(String)}
- ///
- /// </deprecated>
public static long LastModified(System.String directory)
{
return LastModified(new System.IO.FileInfo(directory));
}
/// <summary> Returns the time the index in the named directory was last modified.
- ///
- /// <p>Synchronization of IndexReader and IndexWriter instances is
- /// no longer done via time stamps of the segments file since the time resolution
- /// depends on the hardware platform. Instead, a version number is maintained
- /// within the segments file, which is incremented everytime when the index is
- /// changed.</p>
- ///
+ /// Do not use this to check whether the reader is still up-to-date, use
+ /// {@link #IsCurrent()} instead.
/// </summary>
- /// <deprecated> Replaced by {@link #GetCurrentVersion(File)}
- ///
- /// </deprecated>
public static long LastModified(System.IO.FileInfo directory)
{
- return FSDirectory.FileModified(directory, "segments");
+ return FSDirectory.FileModified(directory, IndexFileNames.SEGMENTS);
}
/// <summary> Returns the time the index in the named directory was last modified.
- ///
- /// <p>Synchronization of IndexReader and IndexWriter instances is
- /// no longer done via time stamps of the segments file since the time resolution
- /// depends on the hardware platform. Instead, a version number is maintained
- /// within the segments file, which is incremented everytime when the index is
- /// changed.</p>
- ///
+ /// Do not use this to check whether the reader is still up-to-date, use
+ /// {@link #IsCurrent()} instead.
/// </summary>
- /// <deprecated> Replaced by {@link #GetCurrentVersion(Directory)}
- ///
- /// </deprecated>
public static long LastModified(Directory directory)
{
- return directory.FileModified("segments");
+ return directory.FileModified(IndexFileNames.SEGMENTS);
}
- /// <summary> Reads version number from segments files. The version number counts the
- /// number of changes of the index.
+ /// <summary> Reads version number from segments files. The version number is
+ /// initialized with a timestamp and then increased by one for each change of
+ /// the index.
///
/// </summary>
/// <param name="directory">where the index resides.
@@ -248,8 +258,9 @@
return GetCurrentVersion(new System.IO.FileInfo(directory));
}
- /// <summary> Reads version number from segments files. The version number counts the
- /// number of changes of the index.
+ /// <summary> Reads version number from segments files. The version number is
+ /// initialized with a timestamp and then increased by one for each change of
+ /// the index.
///
/// </summary>
/// <param name="directory">where the index resides.
@@ -265,8 +276,9 @@
return version;
}
- /// <summary> Reads version number from segments files. The version number counts the
- /// number of changes of the index.
+ /// <summary> Reads version number from segments files. The version number is
+ /// initialized with a timestamp and then increased by one for each change of
+ /// the index.
///
/// </summary>
/// <param name="directory">where the index resides.
@@ -279,24 +291,62 @@
return SegmentInfos.ReadCurrentVersion(directory);
}
- /// <summary>Return an array of term frequency vectors for the specified document.
- /// The array contains a vector for each vectorized Field in the document.
- /// Each vector contains terms and frequencies for all terms
- /// in a given vectorized Field.
- /// If no such fields existed, the method returns null.
+ /// <summary> Version number when this IndexReader was opened.</summary>
+ public virtual long GetVersion()
+ {
+ return segmentInfos.GetVersion();
+ }
+
+ /// <summary> Check whether this IndexReader still works on a current version of the index.
+ /// If this is not the case you will need to re-open the IndexReader to
+ /// make sure you see the latest changes made to the index.
///
/// </summary>
- /// <seealso cref="Field#IsTermVectorStored()">
+ /// <throws> IOException </throws>
+ public virtual bool IsCurrent()
+ {
+ if (SegmentInfos.ReadCurrentVersion(directory) != segmentInfos.GetVersion())
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /// <summary> Return an array of term frequency vectors for the specified document.
+ /// The array contains a vector for each vectorized field in the document.
+ /// Each vector contains terms and frequencies for all terms in a given vectorized field.
+ /// If no such fields existed, the method returns null. The term vectors that are
+ /// returned my either be of type TermFreqVector or of type TermPositionsVector if
+ /// positions or offsets have been stored.
+ ///
+ /// </summary>
+ /// <param name="docNumber">document for which term frequency vectors are returned
+ /// </param>
+ /// <returns> array of term frequency vectors. May be null if no term vectors have been
+ /// stored for the specified document.
+ /// </returns>
+ /// <throws> IOException if index cannot be accessed </throws>
+ /// <seealso cref="Lucene.Net.document.Field.TermVector">
/// </seealso>
abstract public TermFreqVector[] GetTermFreqVectors(int docNumber);
- /// <summary>Return a term frequency vector for the specified document and Field. The
- /// vector returned contains terms and frequencies for those terms in
- /// the specified Field of this document, if the Field had storeTermVector
- /// flag set. If the flag was not set, the method returns null.
+
+ /// <summary> Return a term frequency vector for the specified document and field. The
+ /// returned vector contains terms and frequencies for the terms in
+ /// the specified field of this document, if the field had the storeTermVector
+ /// flag set. If termvectors had been stored with positions or offsets, a
+ /// TermPositionsVector is returned.
///
/// </summary>
- /// <seealso cref="Field#IsTermVectorStored()">
+ /// <param name="docNumber">document for which the term frequency vector is returned
+ /// </param>
+ /// <param name="field">field for which the term frequency vector is returned.
+ /// </param>
+ /// <returns> term frequency vector May be null if field does not exist in the specified
+ /// document or term vector was not stored.
+ /// </returns>
+ /// <throws> IOException if index cannot be accessed </throws>
+ /// <seealso cref="Lucene.Net.document.Field.TermVector">
/// </seealso>
abstract public TermFreqVector GetTermFreqVector(int docNumber, System.String field);
@@ -311,10 +361,10 @@
public static bool IndexExists(System.String directory)
{
bool tmpBool;
- if (System.IO.File.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory, "segments"))).FullName))
+ if (System.IO.File.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory, IndexFileNames.SEGMENTS))).FullName))
tmpBool = true;
else
- tmpBool = System.IO.Directory.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory, "segments"))).FullName);
+ tmpBool = System.IO.Directory.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory, IndexFileNames.SEGMENTS))).FullName);
return tmpBool;
}
@@ -328,10 +378,10 @@
public static bool IndexExists(System.IO.FileInfo directory)
{
bool tmpBool;
- if (System.IO.File.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, "segments"))).FullName))
+ if (System.IO.File.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, IndexFileNames.SEGMENTS))).FullName))
tmpBool = true;
else
- tmpBool = System.IO.Directory.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, "segments"))).FullName);
+ tmpBool = System.IO.Directory.Exists((new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, IndexFileNames.SEGMENTS))).FullName);
return tmpBool;
}
@@ -345,7 +395,7 @@
/// <throws> IOException if there is a problem with accessing the index </throws>
public static bool IndexExists(Directory directory)
{
- return directory.FileExists("segments");
+ return directory.FileExists(IndexFileNames.SEGMENTS);
}
/// <summary>Returns the number of documents in this index. </summary>
@@ -368,32 +418,40 @@
/// <summary>Returns true if any documents have been deleted </summary>
public abstract bool HasDeletions();
- /// <summary>Returns the byte-encoded normalization factor for the named Field of
+ /// <summary>Returns true if there are norms stored for this field. </summary>
+ public virtual bool HasNorms(System.String field)
+ {
+ // backward compatible implementation.
+ // SegmentReader has an efficient implementation.
+ return Norms(field) != null;
+ }
+
+ /// <summary>Returns the byte-encoded normalization factor for the named field of
/// every document. This is used by the search code to score documents.
///
/// </summary>
- /// <seealso cref="Field#SetBoost(float)">
+ /// <seealso cref="Field.SetBoost(float)">
/// </seealso>
public abstract byte[] Norms(System.String field);
- /// <summary>Reads the byte-encoded normalization factor for the named Field of every
+ /// <summary>Reads the byte-encoded normalization factor for the named field of every
/// document. This is used by the search code to score documents.
///
/// </summary>
- /// <seealso cref="Field#SetBoost(float)">
+ /// <seealso cref="Field.SetBoost(float)">
/// </seealso>
public abstract void Norms(System.String field, byte[] bytes, int offset);
- /// <summary>Expert: Resets the normalization factor for the named Field of the named
- /// document. The norm represents the product of the Field's {@link
+ /// <summary>Expert: Resets the normalization factor for the named field of the named
+ /// document. The norm represents the product of the field's {@link
/// Field#SetBoost(float) boost} and its {@link Similarity#LengthNorm(String,
/// int) length normalization}. Thus, to preserve the length normalization
/// values when resetting this, one should base the new value upon the old.
///
/// </summary>
- /// <seealso cref="#Norms(String)">
+ /// <seealso cref="Norms(String)">
/// </seealso>
- /// <seealso cref="Similarity#DecodeNorm(byte)">
+ /// <seealso cref="Similarity.DecodeNorm(byte)">
/// </seealso>
public void SetNorm(int doc, System.String field, byte value_Renamed)
{
@@ -409,20 +467,19 @@
/// <summary>Implements setNorm in subclass.</summary>
protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed);
- /// <summary>Expert: Resets the normalization factor for the named Field of the named
+ /// <summary>Expert: Resets the normalization factor for the named field of the named
/// document.
///
/// </summary>
- /// <seealso cref="#Norms(String)">
+ /// <seealso cref="Norms(String)">
/// </seealso>
- /// <seealso cref="Similarity#DecodeNorm(byte)">
+ /// <seealso cref="Similarity.DecodeNorm(byte)">
/// </seealso>
public virtual void SetNorm(int doc, System.String field, float value_Renamed)
{
SetNorm(doc, field, Similarity.EncodeNorm(value_Renamed));
}
-
/// <summary>Returns an enumeration of all the terms in the index.
/// The enumeration is ordered by Term.compareTo(). Each term
/// is greater than all that precede it in the enumeration.
@@ -463,6 +520,7 @@
/// and frequency of the term in that document, a list of all of the ordinal
/// positions of the term in the document is available. Thus, this method
/// implements the mapping:
+ ///
/// <p><ul>
/// Term => <docNum, freq,
/// <pos<sub>1</sub>, pos<sub>2</sub>, ...
@@ -483,7 +541,7 @@
/// <summary>Returns an unpositioned {@link TermPositions} enumerator. </summary>
public abstract TermPositions TermPositions();
- /// <summary> Trys to acquire the WriteLock on this directory.
+ /// <summary> Tries to acquire the WriteLock on this directory.
/// this method is only valid if this IndexReader is directory owner.
///
/// </summary>
@@ -496,7 +554,7 @@
if (this.writeLock == null)
{
Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
- if (!writeLock.Obtain(IndexWriter.WRITE_LOCK_TIMEOUT))
+ if (!writeLock.obtain(IndexWriter.WRITE_LOCK_TIMEOUT))
// obtain write lock
{
throw new System.IO.IOException("Index locked for write: " + writeLock);
@@ -517,15 +575,34 @@
/// <summary>Deletes the document numbered <code>docNum</code>. Once a document is
/// deleted it will not appear in TermDocs or TermPostitions enumerations.
- /// Attempts to read its Field with the {@link #document}
+ /// Attempts to read its field with the {@link #document}
/// method will result in an error. The presence of this document may still be
/// reflected in the {@link #docFreq} statistic, though
/// this will be corrected eventually as the index is further modified.
+ ///
/// </summary>
+ /// <deprecated> Use {@link #DeleteDocument(int docNum)} instead.
+ /// </deprecated>
public void Delete(int docNum)
{
lock (this)
{
+ DeleteDocument(docNum);
+ }
+ }
+
+ /// <summary>Deletes the document numbered <code>docNum</code>. Once a document is
+ /// deleted it will not appear in TermDocs or TermPostitions enumerations.
+ /// Attempts to read its field with the {@link #document}
+ /// method will result in an error. The presence of this document may still be
+ /// reflected in the {@link #docFreq} statistic, though
+ /// this will be corrected eventually as the index is further modified.
+ /// </summary>
+ //UPGRADE_NOTE: Synchronized keyword was removed from method 'DeleteDocument'. Lock expression was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1027'"
+ public void DeleteDocument(int docNum)
+ {
+ lock (this)
+ {
if (directoryOwner)
AquireWriteLock();
DoDelete(docNum);
@@ -533,19 +610,42 @@
}
}
+
/// <summary>Implements deletion of the document numbered <code>docNum</code>.
/// Applications should call {@link #Delete(int)} or {@link #Delete(Term)}.
/// </summary>
protected internal abstract void DoDelete(int docNum);
/// <summary>Deletes all documents containing <code>term</code>.
- /// This is useful if one uses a document Field to hold a unique ID string for
+ /// This is useful if one uses a document field to hold a unique ID string for
/// the document. Then to delete such a document, one merely constructs a
- /// term with the appropriate Field and the unique ID string as its text and
- /// passes it to this method. Returns the number of documents deleted.
+ /// term with the appropriate field and the unique ID string as its text and
+ /// passes it to this method.
+ /// See {@link #Delete(int)} for information about when this deletion will
+ /// become effective.
/// </summary>
+ /// <returns> the number of documents deleted
+ ///
+ /// </returns>
+ /// <deprecated> Use {@link #DeleteDocuments(Term term)} instead.
+ /// </deprecated>
public int Delete(Term term)
{
+ return DeleteDocuments(term);
+ }
+
+ /// <summary>Deletes all documents containing <code>term</code>.
+ /// This is useful if one uses a document field to hold a unique ID string for
+ /// the document. Then to delete such a document, one merely constructs a
+ /// term with the appropriate field and the unique ID string as its text and
+ /// passes it to this method.
+ /// See {@link #Delete(int)} for information about when this deletion will
+ /// become effective.
+ /// </summary>
+ /// <returns> the number of documents deleted
+ /// </returns>
+ public int DeleteDocuments(Term term)
+ {
TermDocs docs = TermDocs(term);
if (docs == null)
return 0;
@@ -554,7 +654,7 @@
{
while (docs.Next())
{
- Delete(docs.Doc());
+ DeleteDocument(docs.Doc());
n++;
}
}
@@ -619,14 +719,14 @@
/// </summary>
public void Close()
{
- lock (this)
- {
- Commit();
- DoClose();
- if (closeDirectory)
- directory.Close();
+ lock (this)
+ {
+ Commit();
+ DoClose();
+ if (closeDirectory)
+ directory.Close();
System.GC.SuppressFinalize(this);
- }
+ }
}
/// <summary>Implements close. </summary>
@@ -642,15 +742,19 @@
}
}
- /// <summary> Returns a list of all unique Field names that exist in the index pointed
+ /// <summary> Returns a list of all unique field names that exist in the index pointed
/// to by this IndexReader.
/// </summary>
/// <returns> Collection of Strings indicating the names of the fields
/// </returns>
/// <throws> IOException if there is a problem with accessing the index </throws>
+ /// <summary>
+ /// </summary>
+ /// <deprecated> Replaced by {@link #GetFieldNames(IndexReader.FieldOption)}
+ /// </deprecated>
public abstract System.Collections.ICollection GetFieldNames();
- /// <summary> Returns a list of all unique Field names that exist in the index pointed
+ /// <summary> Returns a list of all unique field names that exist in the index pointed
/// to by this IndexReader. The boolean argument specifies whether the fields
/// returned are indexed or not.
/// </summary>
@@ -660,6 +764,10 @@
/// <returns> Collection of Strings indicating the names of the fields
/// </returns>
/// <throws> IOException if there is a problem with accessing the index </throws>
+ /// <summary>
+ /// </summary>
+ /// <deprecated> Replaced by {@link #GetFieldNames(IndexReader.FieldOption)}
+ /// </deprecated>
public abstract System.Collections.ICollection GetFieldNames(bool indexed);
/// <summary> </summary>
@@ -667,8 +775,72 @@
/// else only indexed fields without term vector info
/// </param>
/// <returns> Collection of Strings indicating the names of the fields
+ ///
/// </returns>
- public abstract System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector);
+ /// <deprecated> Replaced by {@link #GetFieldNames(IndexReader.FieldOption)}
+ /// </deprecated>
+ public virtual System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector)
+ {
+ if (storedTermVector)
+ {
+ System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
+ foreach (object item in GetIndexedFieldNames(Field.TermVector.YES))
+ {
+ if (fieldSet.ContainsKey(item) == false)
+ {
+ fieldSet.Add(item, item);
+ }
+ }
+ foreach (object item in GetIndexedFieldNames(Field.TermVector.WITH_POSITIONS))
+ {
+ if (fieldSet.ContainsKey(item) == false)
+ {
+ fieldSet.Add(item, item);
+ }
+ }
+ foreach (object item in GetIndexedFieldNames(Field.TermVector.WITH_OFFSETS))
+ {
+ if (fieldSet.ContainsKey(item) == false)
+ {
+ fieldSet.Add(item, item);
+ }
+ }
+ foreach (object item in GetIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS))
+ {
+ if (fieldSet.ContainsKey(item) == false)
+ {
+ fieldSet.Add(item, item);
+ }
+ }
+ return fieldSet;
+ }
+ else
+ return GetIndexedFieldNames(Field.TermVector.NO);
+ }
+
+ /// <summary> Get a list of unique field names that exist in this index, are indexed, and have
+ /// the specified term vector information.
+ ///
+ /// </summary>
+ /// <param name="tvSpec">specifies which term vector information should be available for the fields
+ /// </param>
+ /// <returns> Collection of Strings indicating the names of the fields
+ ///
+ /// </returns>
+ /// <deprecated> Replaced by {@link #GetFieldNames(IndexReader.FieldOption)}
+ /// </deprecated>
+ public abstract System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec);
+
+ /// <summary> Get a list of unique field names that exist in this index and have the specified
+ /// field option information.
+ /// </summary>
+ /// <param name="fldOption">specifies which field option should be available for the returned fields
+ /// </param>
+ /// <returns> Collection of Strings indicating the names of the fields.
+ /// </returns>
+ /// <seealso cref="IndexReader.FieldOption">
+ /// </seealso>
+ public abstract System.Collections.ICollection GetFieldNames(FieldOption fldOption);
/// <summary> Returns <code>true</code> iff the index in the named directory is
/// currently locked.
@@ -705,6 +877,106 @@
{
directory.MakeLock(IndexWriter.WRITE_LOCK_NAME).Release();
directory.MakeLock(IndexWriter.COMMIT_LOCK_NAME).Release();
+ }
+
+ /// <summary> Prints the filename and size of each file within a given compound file.
+ /// Add the -extract flag to extract files to the current working directory.
+ /// In order to make the extracted version of the index work, you have to copy
+ /// the segments file from the compound index into the directory where the extracted files are stored.
+ /// </summary>
+ /// <param name="args">Usage: Lucene.Net.index.IndexReader [-extract] <cfsfile>
+ /// </param>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ System.String filename = null;
+ bool extract = false;
+
+ for (int i = 0; i < args.Length; ++i)
+ {
+ if (args[i].Equals("-extract"))
+ {
+ extract = true;
+ }
+ else if (filename == null)
+ {
+ filename = args[i];
+ }
+ }
+
+ if (filename == null)
+ {
+ System.Console.Out.WriteLine("Usage: Lucene.Net.index.IndexReader [-extract] <cfsfile>");
+ return ;
+ }
+
+ Directory dir = null;
+ CompoundFileReader cfr = null;
+
+ try
+ {
+ System.IO.FileInfo file = new System.IO.FileInfo(filename);
+ System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName;
+ filename = file.Name;
+ dir = FSDirectory.GetDirectory(dirname, false);
+ cfr = new CompoundFileReader(dir, filename);
+
+ System.String[] files = cfr.List();
+ System.Array.Sort(files); // sort the array of filename so that the output is more readable
+
+ for (int i = 0; i < files.Length; ++i)
+ {
+ long len = cfr.FileLength(files[i]);
+
+ if (extract)
+ {
+ System.Console.Out.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
+ IndexInput ii = cfr.OpenInput(files[i]);
+
+ System.IO.FileStream f = new System.IO.FileStream(files[i], System.IO.FileMode.Create);
+
+ // read and write with a small buffer, which is more effectiv than reading byte by byte
+ byte[] buffer = new byte[1024];
+ int chunk = buffer.Length;
+ while (len > 0)
+ {
+ int bufLen = (int) System.Math.Min(chunk, len);
+ ii.ReadBytes(buffer, 0, bufLen);
+
+ byte[] byteArray = new byte[buffer.Length];
+ for (int index=0; index < buffer.Length; index++)
+ byteArray[index] = (byte) buffer[index];
+
+ f.Write(byteArray, 0, bufLen);
+
+ len -= bufLen;
+ }
+
+ f.Close();
+ ii.Close();
+ }
+ else
+ System.Console.Out.WriteLine(files[i] + ": " + len + " bytes");
+ }
+ }
+ catch (System.IO.IOException ioe)
+ {
+ System.Console.Error.WriteLine(ioe.StackTrace);
+ }
+ finally
+ {
+ try
+ {
+ if (dir != null)
+ dir.Close();
+ if (cfr != null)
+ cfr.Close();
+ }
+ catch (System.IO.IOException ioe)
+ {
+ System.Console.Error.WriteLine(ioe.StackTrace);
+ }
+ }
}
}
}