You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2006/06/04 04:41:25 UTC
svn commit: r411501 [8/30] - in /incubator/lucene.net/trunk/C#/src: ./
Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/
Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/
Lucene.Net/Analysis/Standard/ Lucene.Net/Docum...
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/DateTools.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Document/DateTools.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/DateTools.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/DateTools.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary> Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders
+ /// them by date, which makes them suitable for use as field values
+ /// and search terms.
+ ///
+ /// <P>This class also helps you to limit the resolution of your dates. Do not
+ /// save dates with a finer resolution than you really need, as then
+ /// RangeQuery and PrefixQuery will require more memory and become slower.
+ ///
+ /// <P>Compared to {@link DateField} the strings generated by the methods
+ /// in this class take slightly more space, unless your selected resolution
+ /// is set to <code>Resolution.DAY</code> or lower.
+ /// </summary>
+ public class DateTools
+ {
+
+ private DateTools()
+ {
+ }
+
+ /// <summary> Converts a Date to a string suitable for indexing.
+ ///
+ /// </summary>
+ /// <param name="date">the date to be converted
+ /// </param>
+ /// <param name="resolution">the desired resolution, see
+ /// {@link #Round(Date, DateTools.Resolution)}
+ /// </param>
+ /// <returns> a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
+ /// depeding on <code>resolution</code>
+ /// </returns>
+ public static System.String DateToString(System.DateTime date, Resolution resolution)
+ {
+ return TimeToString(date.Ticks, resolution);
+ }
+
+ /// <summary> Converts a millisecond time to a string suitable for indexing.
+ ///
+ /// </summary>
+ /// <param name="time">the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </param>
+ /// <param name="resolution">the desired resolution, see
+ /// {@link #Round(long, DateTools.Resolution)}
+ /// </param>
+ /// <returns> a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
+ /// depeding on <code>resolution</code>
+ /// </returns>
+ public static System.String TimeToString(long time, Resolution resolution)
+ {
+ System.Globalization.Calendar cal = new System.Globalization.GregorianCalendar(); // {{Aroush}} do we care about 'cal'
+
+ //protected in JDK's prior to 1.4
+ //cal.setTimeInMillis(round(time, resolution));
+
+ System.DateTime dt = new System.DateTime(Round(time, resolution));
+
+ System.String t = "";
+
+ if (resolution == Resolution.YEAR)
+ {
+ t = dt.ToString("yyyy");
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ t = dt.ToString("yyyyMM");
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ t = dt.ToString("yyyyMMdd");
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ t = dt.ToString("yyyyMMddHH");
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ t = dt.ToString("yyyyMMddHHmm");
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ t = dt.ToString("yyyyMMddHHmmss");
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ t = dt.ToString("yyyyMMddHHmmssfff");
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+
+ return t;
+ }
+
+ /// <summary> Converts a string produced by <code>timeToString</code> or
+ /// <code>DateToString</code> back to a time, represented as the
+ /// number of milliseconds since January 1, 1970, 00:00:00 GMT.
+ ///
+ /// </summary>
+ /// <param name="dateString">the date string to be converted
+ /// </param>
+ /// <returns> the number of milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </returns>
+ /// <throws> ParseException if <code>dateString</code> is not in the </throws>
+ /// <summary> expected format
+ /// </summary>
+ public static long StringToTime(System.String dateString)
+ {
+ return StringToDate(dateString).Ticks;
+ }
+
+ /// <summary> Converts a string produced by <code>timeToString</code> or
+ /// <code>DateToString</code> back to a time, represented as a
+ /// Date object.
+ ///
+ /// </summary>
+ /// <param name="dateString">the date string to be converted
+ /// </param>
+ /// <returns> the parsed time as a Date object
+ /// </returns>
+ /// <throws> ParseException if <code>dateString</code> is not in the </throws>
+ /// <summary> expected format
+ /// </summary>
+ public static System.DateTime StringToDate(System.String dateString)
+ {
+ System.String yyyy = "1";
+ System.String MM = "1";
+ System.String dd = "1";
+ System.String HH = "0";
+ System.String mm = "0";
+ System.String ss = "0";
+ System.String SSS = "0";
+
+ if (dateString.Length == 4) // "yyyy"
+ {
+ yyyy = dateString.Substring(0, 4);
+ }
+ else if (dateString.Length == 6) // "yyyyMM";
+ {
+ yyyy = dateString.Substring(0, 4);
+ MM = dateString.Substring(4, 2);
+ }
+ else if (dateString.Length == 8) // "yyyyMMdd"
+ {
+ yyyy = dateString.Substring(0, 4);
+ MM = dateString.Substring(4, 2);
+ dd = dateString.Substring(6, 2);
+ }
+ else if (dateString.Length == 10) // "yyyyMMddHH"
+ {
+ yyyy = dateString.Substring(0, 4);
+ MM = dateString.Substring(4, 2);
+ dd = dateString.Substring(6, 2);
+ HH = dateString.Substring(8, 2);
+ }
+ else if (dateString.Length == 12) // "yyyyMMddHHmm";
+ {
+ yyyy = dateString.Substring(0, 4);
+ MM = dateString.Substring(4, 2);
+ dd = dateString.Substring(6, 2);
+ HH = dateString.Substring(8, 2);
+ mm = dateString.Substring(10, 2);
+ }
+ else if (dateString.Length == 14) // "yyyyMMddHHmmss";
+ {
+ yyyy = dateString.Substring(0, 4);
+ MM = dateString.Substring(4, 2);
+ dd = dateString.Substring(6, 2);
+ HH = dateString.Substring(8, 2);
+ mm = dateString.Substring(10, 2);
+ ss = dateString.Substring(12, 2);
+ }
+ else if (dateString.Length == 17) // "yyyyMMddHHmmssSSS";
+ {
+ yyyy = dateString.Substring(0, 4);
+ MM = dateString.Substring(4, 2);
+ dd = dateString.Substring(6, 2);
+ HH = dateString.Substring(8, 2);
+ mm = dateString.Substring(10, 2);
+ ss = dateString.Substring(12, 2);
+ SSS = dateString.Substring(14, 3);
+ }
+ else
+ {
+ throw new System.FormatException("Input is not valid date string: " + dateString);
+ }
+
+ int y, M, d, H, m, s, S;
+ y = Convert.ToInt16(yyyy);
+ M = Convert.ToInt16(MM);
+ d = Convert.ToInt16(dd);
+ H = Convert.ToInt16(HH);
+ m = Convert.ToInt16(mm);
+ s = Convert.ToInt16(ss);
+ S = Convert.ToInt16(SSS);
+
+ return new System.DateTime(y,
+ M, d, H,
+ m, s, S);
+
+ //return new System.DateTime(Convert.ToInt16(yyyy),
+ // Convert.ToInt16(MM), Convert.ToInt16(dd), Convert.ToInt16(HH),
+ // Convert.ToInt16(mm), Convert.ToInt16(ss), Convert.ToInt16(SSS));
+ }
+
+ /// <summary> Limit a date's resolution. For example, the date <code>2004-09-21 13:50:11</code>
+ /// will be changed to <code>2004-09-01 00:00:00</code> when using
+ /// <code>Resolution.MONTH</code>.
+ ///
+ /// </summary>
+ /// <param name="resolution">The desired resolution of the date to be returned
+ /// </param>
+ /// <returns> the date with all values more precise than <code>resolution</code>
+ /// set to 0 or 1
+ /// </returns>
+ public static System.DateTime Round(System.DateTime date, Resolution resolution)
+ {
+ return new System.DateTime(Round(date.Ticks, resolution));
+ }
+
+ /// <summary> Limit a date's resolution. For example, the date <code>1095767411000</code>
+ /// (which represents 2004-09-21 13:50:11) will be changed to
+ /// <code>1093989600000</code> (2004-09-01 00:00:00) when using
+ /// <code>Resolution.MONTH</code>.
+ ///
+ /// </summary>
+ /// <param name="resolution">The desired resolution of the date to be returned
+ /// </param>
+ /// <returns> the date with all values more precise than <code>resolution</code>
+ /// set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ /// </returns>
+ public static long Round(long time, Resolution resolution)
+ {
+ System.Globalization.Calendar cal = new System.Globalization.GregorianCalendar(); // {{Aroush}} do we care about 'cal'
+
+ // protected in JDK's prior to 1.4
+ //cal.setTimeInMillis(time);
+
+ System.DateTime dt = new System.DateTime(time);
+
+ if (resolution == Resolution.YEAR)
+ {
+ dt = dt.AddMonths(1 - dt.Month);
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ // don't cut off anything
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+ return dt.Ticks;
+ }
+
+ public class Resolution
+ {
+ public static readonly Resolution YEAR = new Resolution("year");
+ public static readonly Resolution MONTH = new Resolution("month");
+ public static readonly Resolution DAY = new Resolution("day");
+ public static readonly Resolution HOUR = new Resolution("hour");
+ public static readonly Resolution MINUTE = new Resolution("minute");
+ public static readonly Resolution SECOND = new Resolution("second");
+ public static readonly Resolution MILLISECOND = new Resolution("millisecond");
+
+ private System.String resolution;
+
+ internal Resolution()
+ {
+ }
+
+ internal Resolution(System.String resolution)
+ {
+ this.resolution = resolution;
+ }
+
+ public override System.String ToString()
+ {
+ return resolution;
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/Document.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Document/Document.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/Document.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/Document.cs Sat Jun 3 19:41:13 2006
@@ -13,17 +13,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using IndexReader = Lucene.Net.Index.IndexReader;
using Hits = Lucene.Net.Search.Hits;
using Searcher = Lucene.Net.Search.Searcher;
+
namespace Lucene.Net.Documents
{
/// <summary>Documents are the unit of indexing and search.
///
- /// A Document is a set of fields. Each Field has a name and a textual value.
- /// A Field may be {@link Field#IsStored() stored} with the document, in which
+ /// A Document is a set of fields. Each field has a name and a textual value.
+ /// A field may be {@link Field#IsStored() stored} with the document, in which
/// case it is returned with search hits on the document. Thus each document
/// should typically contain one or more stored fields which uniquely identify
/// it.
@@ -37,7 +39,7 @@
[Serializable]
public sealed class Document
{
- public System.Collections.IList fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
+ internal System.Collections.IList fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
private float boost = 1.0f;
/// <summary>Constructs a new document with no fields. </summary>
@@ -46,22 +48,22 @@
}
- /// <summary>Sets a boost factor for hits on any Field of this document. This value
+ /// <summary>Sets a boost factor for hits on any field of this document. This value
/// will be multiplied into the score of all hits on this document.
///
/// <p>Values are multiplied into the value of {@link Field#GetBoost()} of
- /// each Field in this document. Thus, this method in effect sets a default
+ /// each field in this document. Thus, this method in effect sets a default
/// boost for the fields of this document.
///
/// </summary>
- /// <seealso cref="Field#SetBoost(float)">
+ /// <seealso cref="Field.SetBoost(float)">
/// </seealso>
public void SetBoost(float boost)
{
this.boost = boost;
}
- /// <summary>Returns the boost factor for hits on any Field of this document.
+ /// <summary>Returns the boost factor for hits on any field of this document.
///
/// <p>The default value is 1.0.
///
@@ -71,14 +73,14 @@
/// this document was indexed.
///
/// </summary>
- /// <seealso cref="#SetBoost(float)">
+ /// <seealso cref="SetBoost(float)">
/// </seealso>
public float GetBoost()
{
return boost;
}
- /// <summary> <p>Adds a Field to a document. Several fields may be added with
+ /// <summary> <p>Adds a field to a document. Several fields may be added with
/// the same name. In this case, if the fields are indexed, their text is
/// treated as though appended for the purposes of search.</p>
/// <p> Note that add like the removeField(s) methods only makes sense
@@ -92,9 +94,9 @@
fields.Add(field);
}
- /// <summary> <p>Removes Field with the specified name from the document.
- /// If multiple fields exist with this name, this method removes the first Field that has been added.
- /// If there is no Field with the specified name, the document remains unchanged.</p>
+ /// <summary> <p>Removes field with the specified name from the document.
+ /// If multiple fields exist with this name, this method removes the first field that has been added.
+ /// If there is no field with the specified name, the document remains unchanged.</p>
/// <p> Note that the removeField(s) methods like the add method only make sense
/// prior to adding a document to an index. These methods cannot
/// be used to change the content of an existing index! In order to achieve this,
@@ -116,7 +118,7 @@
}
/// <summary> <p>Removes all fields with the given name from the document.
- /// If there is no Field with the specified name, the document remains unchanged.</p>
+ /// If there is no field with the specified name, the document remains unchanged.</p>
/// <p> Note that the removeField(s) methods like the add method only make sense
/// prior to adding a document to an index. These methods cannot
/// be used to change the content of an existing index! In order to achieve this,
@@ -135,7 +137,7 @@
}
}
- /// <summary>Returns a Field with the given name if any exist in this document, or
+ /// <summary>Returns a field with the given name if any exist in this document, or
/// null. If multiple fields exists with this name, this method returns the
/// first value added.
/// </summary>
@@ -150,36 +152,39 @@
return null;
}
- /// <summary>Returns the string value of the Field with the given name if any exist in
+ /// <summary>Returns the string value of the field with the given name if any exist in
/// this document, or null. If multiple fields exist with this name, this
- /// method returns the first value added.
+ /// method returns the first value added. If only binary fields with this name
+ /// exist, returns null.
/// </summary>
public System.String Get(System.String name)
{
- Field field = GetField(name);
- if (field != null)
- return field.StringValue();
- else
- return null;
+ for (int i = 0; i < fields.Count; i++)
+ {
+ Field field = (Field) fields[i];
+ if (field.Name().Equals(name) && (!field.IsBinary()))
+ return field.StringValue();
+ }
+ return null;
}
/// <summary>Returns an Enumeration of all the fields in a document. </summary>
- public System.Collections.IEnumerable Fields()
+ public System.Collections.IEnumerator Fields()
{
- return (System.Collections.IEnumerable) fields;
+ return ((System.Collections.ArrayList) fields).GetEnumerator();
}
/// <summary> Returns an array of {@link Field}s with the given name.
/// This method can return <code>null</code>.
///
/// </summary>
- /// <param name="name">the name of the Field
+ /// <param name="name">the name of the field
/// </param>
/// <returns> a <code>Field[]</code> array
/// </returns>
public Field[] GetFields(System.String name)
{
- System.Collections.ArrayList result = new System.Collections.ArrayList();
+ System.Collections.ArrayList result = new System.Collections.ArrayList();
for (int i = 0; i < fields.Count; i++)
{
Field field = (Field) fields[i];
@@ -192,28 +197,100 @@
if (result.Count == 0)
return null;
- return (Field[]) result.ToArray(typeof(Field));
+ return (Field[]) result.ToArray(typeof(Field));
}
- /// <summary> Returns an array of values of the Field specified as the method parameter.
+ /// <summary> Returns an array of values of the field specified as the method parameter.
/// This method can return <code>null</code>.
///
/// </summary>
- /// <param name="name">the name of the Field
+ /// <param name="name">the name of the field
/// </param>
- /// <returns> a <code>String[]</code> of Field values
+ /// <returns> a <code>String[]</code> of field values
/// </returns>
public System.String[] GetValues(System.String name)
{
- Field[] namedFields = GetFields(name);
- if (namedFields == null)
+ System.Collections.ArrayList result = new System.Collections.ArrayList();
+ for (int i = 0; i < fields.Count; i++)
+ {
+ Field field = (Field) fields[i];
+ if (field.Name().Equals(name) && (!field.IsBinary()))
+ result.Add(field.StringValue());
+ }
+
+ if (result.Count == 0)
+ return null;
+
+ return (System.String[]) (result.ToArray(typeof(System.String)));
+ }
+
+ /// <summary> Returns an array of byte arrays for of the fields that have the name specified
+ /// as the method parameter. This method will return <code>null</code> if no
+ /// binary fields with the specified name are available.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field
+ /// </param>
+ /// <returns> a <code>byte[][]</code> of binary field values.
+ /// </returns>
+ public byte[][] GetBinaryValues(System.String name)
+ {
+ System.Collections.IList result = new System.Collections.ArrayList();
+ for (int i = 0; i < fields.Count; i++)
+ {
+ Field field = (Field) fields[i];
+ if (field.Name().Equals(name) && (field.IsBinary()))
+ {
+ byte[] byteArray = field.BinaryValue();
+ byte[] resultByteArray = new byte[byteArray.Length];
+ for (int index = 0; index < byteArray.Length; index++)
+ resultByteArray[index] = (byte) byteArray[index];
+
+ result.Add(resultByteArray);
+ }
+ }
+
+ if (result.Count == 0)
return null;
- System.String[] values = new System.String[namedFields.Length];
- for (int i = 0; i < namedFields.Length; i++)
+
+ System.Collections.ICollection c = result;
+ System.Object[] objects = new byte[result.Count][];
+
+ System.Type type = objects.GetType().GetElementType();
+ System.Object[] objs = (System.Object[]) Array.CreateInstance(type, c.Count );
+
+ System.Collections.IEnumerator e = c.GetEnumerator();
+ int ii = 0;
+
+ while (e.MoveNext())
+ objs[ii++] = e.Current;
+
+ // If objects is smaller than c then do not return the new array in the parameter
+ if (objects.Length >= c.Count)
+ objs.CopyTo(objects, 0);
+
+ return (byte[][]) objs;
+ }
+
+ /// <summary> Returns an array of bytes for the first (or only) field that has the name
+ /// specified as the method parameter. This method will return <code>null</code>
+ /// if no binary fields with the specified name are available.
+ /// There may be non-binary fields with the same name.
+ ///
+ /// </summary>
+ /// <param name="name">the name of the field.
+ /// </param>
+ /// <returns> a <code>byte[]</code> containing the binary field value.
+ /// </returns>
+ public byte[] GetBinaryValue(System.String name)
+ {
+ for (int i = 0; i < fields.Count; i++)
{
- values[i] = namedFields[i].StringValue();
+ Field field = (Field) fields[i];
+ if (field.Name().Equals(name) && (field.IsBinary()))
+ return field.BinaryValue();
}
- return values;
+ return null;
}
/// <summary>Prints the fields of a document for human consumption. </summary>
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/Field.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Document/Field.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/Field.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/Field.cs Sat Jun 3 19:41:13 2006
@@ -13,14 +13,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using IndexReader = Lucene.Net.Index.IndexReader;
using Hits = Lucene.Net.Search.Hits;
using Similarity = Lucene.Net.Search.Similarity;
+using Parameter = Lucene.Net.Util.Parameter;
+
namespace Lucene.Net.Documents
{
- /// <summary>A Field is a section of a Document. Each Field has two parts, a name and a
+ /// <summary>A field is a section of a Document. Each field has two parts, a name and a
/// value. Values may be free text, provided as a String or as a Reader, or they
/// may be atomic keywords, which are not further processed. Such keywords may
/// be used to represent dates, urls, etc. Fields are optionally stored in the
@@ -31,21 +34,129 @@
public sealed class Field
{
private System.String name = "body";
- private System.String stringValue = null;
+
+ // the one and only data object for all different kind of field values
+ private System.Object fieldsData = null;
+
private bool storeTermVector = false;
- private System.IO.TextReader readerValue = null;
+ private bool storeOffsetWithTermVector = false;
+ private bool storePositionWithTermVector = false;
+ private bool omitNorms = false;
private bool isStored = false;
private bool isIndexed = true;
private bool isTokenized = true;
+ private bool isBinary = false;
+ private bool isCompressed = false;
private float boost = 1.0f;
- /// <summary>Sets the boost factor hits on this Field. This value will be
- /// multiplied into the score of all hits on this this Field of this
+ [Serializable]
+ public sealed class Store : Parameter
+ {
+
+ internal Store(System.String name) : base(name)
+ {
+ }
+
+ /// <summary>Store the original field value in the index in a compressed form. This is
+ /// useful for long documents and for binary valued fields.
+ /// </summary>
+ public static readonly Store COMPRESS = new Store("COMPRESS");
+
+ /// <summary>Store the original field value in the index. This is useful for short texts
+ /// like a document's title which should be displayed with the results. The
+ /// value is stored in its original form, i.e. no analyzer is used before it is
+ /// stored.
+ /// </summary>
+ public static readonly Store YES = new Store("YES");
+
+ /// <summary>Do not store the field value in the index. </summary>
+ public static readonly Store NO = new Store("NO");
+ }
+
+ [Serializable]
+ public sealed class Index : Parameter
+ {
+
+ internal Index(System.String name) : base(name)
+ {
+ }
+
+ /// <summary>Do not index the field value. This field can thus not be searched,
+ /// but one can still access its contents provided it is
+ /// {@link Field.Store stored}.
+ /// </summary>
+ public static readonly Index NO = new Index("NO");
+
+ /// <summary>Index the field's value so it can be searched. An Analyzer will be used
+ /// to tokenize and possibly further normalize the text before its
+ /// terms will be stored in the index. This is useful for common text.
+ /// </summary>
+ public static readonly Index TOKENIZED = new Index("TOKENIZED");
+
+ /// <summary>Index the field's value without using an Analyzer, so it can be searched.
+ /// As no analyzer is used the value will be stored as a single term. This is
+ /// useful for unique Ids like product numbers.
+ /// </summary>
+ public static readonly Index UN_TOKENIZED = new Index("UN_TOKENIZED");
+
+ /// <summary>Index the field's value without an Analyzer, and disable
+ /// the storing of norms. No norms means that index-time boosting
+ /// and field length normalization will be disabled. The benefit is
+ /// less memory usage as norms take up one byte per indexed field
+ /// for every document in the index.
+ /// </summary>
+ public static readonly Index NO_NORMS = new Index("NO_NORMS");
+ }
+
+ [Serializable]
+ public sealed class TermVector : Parameter
+ {
+
+ internal TermVector(System.String name) : base(name)
+ {
+ }
+
+ /// <summary>Do not store term vectors. </summary>
+ public static readonly TermVector NO = new TermVector("NO");
+
+ /// <summary>Store the term vectors of each document. A term vector is a list
+ /// of the document's terms and their number of occurences in that document.
+ /// </summary>
+ public static readonly TermVector YES = new TermVector("YES");
+
+ /// <summary> Store the term vector + token position information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ public static readonly TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS");
+
+ /// <summary> Store the term vector + Token offset information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ public static readonly TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS");
+
+ /// <summary> Store the term vector + Token position and offset information
+ ///
+ /// </summary>
+ /// <seealso cref="YES">
+ /// </seealso>
+ /// <seealso cref="WITH_POSITIONS">
+ /// </seealso>
+ /// <seealso cref="WITH_OFFSETS">
+ /// </seealso>
+ public static readonly TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
+ }
+
+ /// <summary>Sets the boost factor hits on this field. This value will be
+ /// multiplied into the score of all hits on this this field of this
/// document.
///
/// <p>The boost is multiplied by {@link Document#GetBoost()} of the document
- /// containing this Field. If a document has multiple fields with the same
+ /// containing this field. If a document has multiple fields with the same
/// name, all such values are multiplied together. This product is then
/// multipled by the value {@link Similarity#LengthNorm(String,int)}, and
/// rounded by {@link Similarity#EncodeNorm(float)} before it is stored in the
@@ -53,28 +164,28 @@
/// the range of that encoding.
///
/// </summary>
- /// <seealso cref="Document#SetBoost(float)">
+ /// <seealso cref="Document.SetBoost(float)">
/// </seealso>
- /// <seealso cref="int)">
+ /// <seealso cref="Similarity.LengthNorm(String, int)">
/// </seealso>
- /// <seealso cref="Similarity#EncodeNorm(float)">
+ /// <seealso cref="Similarity.EncodeNorm(float)">
/// </seealso>
public void SetBoost(float boost)
{
this.boost = boost;
}
- /// <summary>Returns the boost factor for hits on any Field of this document.
+ /// <summary>Returns the boost factor for hits for this field.
///
/// <p>The default value is 1.0.
///
/// <p>Note: this value is not stored directly with the document in the index.
- /// Documents returned from {@link IndexReader#Document(int)} and {@link
- /// Hits#Doc(int)} may thus not have the same value present as when this Field
- /// was indexed.
+ /// Documents returned from {@link IndexReader#Document(int)} and
+ /// {@link Hits#Doc(int)} may thus not have the same value present as when
+ /// this field was indexed.
///
/// </summary>
- /// <seealso cref="#SetBoost(float)">
+ /// <seealso cref="SetBoost(float)">
/// </seealso>
public float GetBoost()
{
@@ -84,14 +195,20 @@
/// <summary>Constructs a String-valued Field that is not tokenized, but is indexed
/// and stored. Useful for non-text fields, e.g. date or url.
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
+ /// Field(name, value, Field.Store.YES, Field.Index.UN_TOKENIZED)} instead
+ /// </deprecated>
public static Field Keyword(System.String name, System.String value_Renamed)
{
return new Field(name, value_Renamed, true, true, false);
}
/// <summary>Constructs a String-valued Field that is not tokenized nor indexed,
- /// but is stored in the index, for return with hits.
+ /// but is stored in the index, for return with hits.
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
+ /// Field(name, value, Field.Store.YES, Field.Index.NO)} instead
+ /// </deprecated>
public static Field UnIndexed(System.String name, System.String value_Renamed)
{
return new Field(name, value_Renamed, true, false, false);
@@ -99,16 +216,22 @@
/// <summary>Constructs a String-valued Field that is tokenized and indexed,
/// and is stored in the index, for return with hits. Useful for short text
- /// fields, like "title" or "subject". Term vector will not be stored for this Field.
+ /// fields, like "title" or "subject". Term vector will not be stored for this field.
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
+ /// Field(name, value, Field.Store.YES, Field.Index.TOKENIZED)} instead
+ /// </deprecated>
public static Field Text(System.String name, System.String value_Renamed)
{
return Text(name, value_Renamed, false);
}
/// <summary>Constructs a Date-valued Field that is not tokenized and is indexed,
- /// and stored in the index, for return with hits.
+ /// and stored in the index, for return with hits.
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
+ /// Field(name, value, Field.Store.YES, Field.Index.UN_TOKENIZED)} instead
+ /// </deprecated>
public static Field Keyword(System.String name, System.DateTime value_Renamed)
{
return new Field(name, DateField.DateToString(value_Renamed), true, true, false);
@@ -116,24 +239,33 @@
/// <summary>Constructs a String-valued Field that is tokenized and indexed,
/// and is stored in the index, for return with hits. Useful for short text
- /// fields, like "title" or "subject".
+ /// fields, like "title" or "subject".
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)
+ /// Field(name, value, Field.Store.YES, Field.Index.TOKENIZED, storeTermVector)} instead
+ /// </deprecated>
public static Field Text(System.String name, System.String value_Renamed, bool storeTermVector)
{
return new Field(name, value_Renamed, true, true, true, storeTermVector);
}
/// <summary>Constructs a String-valued Field that is tokenized and indexed,
- /// but that is not stored in the index. Term vector will not be stored for this Field.
+ /// but that is not stored in the index. Term vector will not be stored for this field.
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
+ /// Field(name, value, Field.Store.NO, Field.Index.TOKENIZED)} instead
+ /// </deprecated>
public static Field UnStored(System.String name, System.String value_Renamed)
{
return UnStored(name, value_Renamed, false);
}
/// <summary>Constructs a String-valued Field that is tokenized and indexed,
- /// but that is not stored in the index.
+ /// but that is not stored in the index.
/// </summary>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)
+ /// Field(name, value, Field.Store.NO, Field.Index.TOKENIZED, storeTermVector)} instead
+ /// </deprecated>
public static Field UnStored(System.String name, System.String value_Renamed, bool storeTermVector)
{
return new Field(name, value_Renamed, false, true, true, storeTermVector);
@@ -141,8 +273,10 @@
/// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
/// not stored in the index verbatim. Useful for longer text fields, like
- /// "body". Term vector will not be stored for this Field.
+ /// "body". Term vector will not be stored for this field.
/// </summary>
+ /// <deprecated> use {@link #Field(String, Reader) Field(name, value)} instead
+ /// </deprecated>
public static Field Text(System.String name, System.IO.TextReader value_Renamed)
{
return Text(name, value_Renamed, false);
@@ -150,8 +284,11 @@
/// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
/// not stored in the index verbatim. Useful for longer text fields, like
- /// "body".
+ /// "body".
/// </summary>
+ /// <deprecated> use {@link #Field(String, Reader, Field.TermVector)
+ /// Field(name, value, storeTermVector)} instead
+ /// </deprecated>
public static Field Text(System.String name, System.IO.TextReader value_Renamed, bool storeTermVector)
{
Field f = new Field(name, value_Renamed);
@@ -159,80 +296,320 @@
return f;
}
- /// <summary>The name of the Field (e.g., "date", "subject", "title", or "body")
- /// as an interned string.
+ /// <summary>Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
/// </summary>
public System.String Name()
{
return name;
}
- /// <summary>The value of the Field as a String, or null. If null, the Reader value
- /// is used. Exactly one of stringValue() and readerValue() must be set.
+ /// <summary>The value of the field as a String, or null. If null, the Reader value
+ /// or binary value is used. Exactly one of stringValue(), readerValue(), and
+ /// binaryValue() must be set.
/// </summary>
public System.String StringValue()
{
- return stringValue;
+ return fieldsData is System.String ? (System.String) fieldsData : null;
}
- /// <summary>The value of the Field as a Reader, or null. If null, the String value
- /// is used. Exactly one of stringValue() and readerValue() must be set.
+
+ /// <summary>The value of the field as a Reader, or null. If null, the String value
+ /// or binary value is used. Exactly one of stringValue(), readerValue(),
+ /// and binaryValue() must be set.
/// </summary>
public System.IO.TextReader ReaderValue()
{
- return readerValue;
+ return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null;
}
+ /// <summary>The value of the field in Binary, or null. If null, the Reader or
+ /// String value is used. Exactly one of stringValue(), readerValue() and
+ /// binaryValue() must be set.
+ /// </summary>
+ public byte[] BinaryValue()
+ {
+ return fieldsData is byte[] ? (byte[]) fieldsData : null;
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index. Term vectors will not be stored in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <code>value</code> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <throws> NullPointerException if name or value is <code>null</code> </throws>
+ /// <throws> IllegalArgumentException if the field is neither stored nor indexed </throws>
+ public Field(System.String name, System.String value_Renamed, Store store, Index index) : this(name, value_Renamed, store, index, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The string to process
+ /// </param>
+ /// <param name="store">Whether <code>value</code> should be stored in the index
+ /// </param>
+ /// <param name="index">Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or value is <code>null</code> </throws>
+ /// <throws> IllegalArgumentException in any of the following situations: </throws>
+ /// <summary> <ul>
+ /// <li>the field is neither stored nor indexed</li>
+ /// <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+ /// </ul>
+ /// </summary>
+ public Field(System.String name, System.String value_Renamed, Store store, Index index, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (value_Renamed == null)
+ throw new System.NullReferenceException("value cannot be null");
+ if (index == Index.NO && store == Store.NO)
+ throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
+ if (index == Index.NO && termVector != TermVector.NO)
+ throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
+
+ this.name = String.Intern(name); // field names are interned
+ this.fieldsData = value_Renamed;
+
+ if (store == Store.YES)
+ {
+ this.isStored = true;
+ this.isCompressed = false;
+ }
+ else if (store == Store.COMPRESS)
+ {
+ this.isStored = true;
+ this.isCompressed = true;
+ }
+ else if (store == Store.NO)
+ {
+ this.isStored = false;
+ this.isCompressed = false;
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown store parameter " + store);
+ }
+
+ if (index == Index.NO)
+ {
+ this.isIndexed = false;
+ this.isTokenized = false;
+ }
+ else if (index == Index.TOKENIZED)
+ {
+ this.isIndexed = true;
+ this.isTokenized = true;
+ }
+ else if (index == Index.UN_TOKENIZED)
+ {
+ this.isIndexed = true;
+ this.isTokenized = false;
+ }
+ else if (index == Index.NO_NORMS)
+ {
+ this.isIndexed = true;
+ this.isTokenized = false;
+ this.omitNorms = true;
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown index parameter " + index);
+ }
+
+ this.isBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="reader">The reader with the content
+ /// </param>
+ /// <throws> NullPointerException if name or reader is <code>null</code> </throws>
+ public Field(System.String name, System.IO.TextReader reader) : this(name, reader, TermVector.NO)
+ {
+ }
+
+ /// <summary> Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="reader">The reader with the content
+ /// </param>
+ /// <param name="termVector">Whether term vector should be stored
+ /// </param>
+ /// <throws> NullPointerException if name or reader is <code>null</code> </throws>
+ public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (reader == null)
+ throw new System.NullReferenceException("reader cannot be null");
+
+ this.name = String.Intern(name); // field names are interned
+ this.fieldsData = reader;
+
+ this.isStored = false;
+ this.isCompressed = false;
+
+ this.isIndexed = true;
+ this.isTokenized = true;
+
+ this.isBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
- /// <summary>Create a Field by specifying all parameters except for <code>storeTermVector</code>,
+ /// <summary>Create a field by specifying all parameters except for <code>storeTermVector</code>,
/// which is set to <code>false</code>.
+ ///
/// </summary>
- public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token):this(name, string_Renamed, store, index, token, false)
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)} instead
+ /// </deprecated>
+ public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token) : this(name, string_Renamed, store, index, token, false)
{
}
+
+ /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ /// </summary>
+ /// <param name="name">The name of the field
+ /// </param>
+ /// <param name="value">The binary value
+ /// </param>
+ /// <param name="store">How <code>value</code> should be stored (compressed or not.)
+ /// </param>
+ public Field(System.String name, byte[] value_Renamed, Store store)
+ {
+ if (name == null)
+ throw new System.ArgumentException("name cannot be null");
+ if (value_Renamed == null)
+ throw new System.ArgumentException("value cannot be null");
+
+ this.name = String.Intern(name);
+ this.fieldsData = value_Renamed;
+
+ if (store == Store.YES)
+ {
+ this.isStored = true;
+ this.isCompressed = false;
+ }
+ else if (store == Store.COMPRESS)
+ {
+ this.isStored = true;
+ this.isCompressed = true;
+ }
+ else if (store == Store.NO)
+ throw new System.ArgumentException("binary values can't be unstored");
+ else
+ {
+ throw new System.ArgumentException("unknown store parameter " + store);
+ }
+
+ this.isIndexed = false;
+ this.isTokenized = false;
+
+ this.isBinary = true;
+
+ SetStoreTermVector(TermVector.NO);
+ }
+
/// <summary> </summary>
- /// <param name="name">The name of the Field
+ /// <param name="name">The name of the field
/// </param>
/// <param name="string">The string to process
/// </param>
- /// <param name="store">true if the Field should store the string
+ /// <param name="store">true if the field should store the string
/// </param>
- /// <param name="index">true if the Field should be indexed
+ /// <param name="index">true if the field should be indexed
/// </param>
- /// <param name="token">true if the Field should be tokenized
+ /// <param name="token">true if the field should be tokenized
/// </param>
/// <param name="storeTermVector">true if we should store the Term Vector info
+ ///
/// </param>
+ /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)} instead
+ /// </deprecated>
public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token, bool storeTermVector)
{
if (name == null)
- throw new System.ArgumentException("name cannot be null");
+ throw new System.NullReferenceException("name cannot be null");
if (string_Renamed == null)
- throw new System.ArgumentException("value cannot be null");
+ throw new System.NullReferenceException("value cannot be null");
if (!index && storeTermVector)
- throw new System.ArgumentException("cannot store a term vector for fields that are not indexed.");
+ throw new System.ArgumentException("cannot store a term vector for fields that are not indexed");
- this.name = String.Intern(name); // Field names are interned
- this.stringValue = string_Renamed;
+ this.name = String.Intern(name); // field names are interned
+ this.fieldsData = string_Renamed;
this.isStored = store;
this.isIndexed = index;
this.isTokenized = token;
this.storeTermVector = storeTermVector;
}
- internal Field(System.String name, System.IO.TextReader reader)
+ private void SetStoreTermVector(TermVector termVector)
{
- if (name == null)
- throw new System.ArgumentException("name cannot be null");
- if (reader == null)
- throw new System.ArgumentException("value cannot be null");
-
- this.name = String.Intern(name); // Field names are interned
- this.readerValue = reader;
+ if (termVector == TermVector.NO)
+ {
+ this.storeTermVector = false;
+ this.storePositionWithTermVector = false;
+ this.storeOffsetWithTermVector = false;
+ }
+ else if (termVector == TermVector.YES)
+ {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = false;
+ this.storeOffsetWithTermVector = false;
+ }
+ else if (termVector == TermVector.WITH_POSITIONS)
+ {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = true;
+ this.storeOffsetWithTermVector = false;
+ }
+ else if (termVector == TermVector.WITH_OFFSETS)
+ {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = false;
+ this.storeOffsetWithTermVector = true;
+ }
+ else if (termVector == TermVector.WITH_POSITIONS_OFFSETS)
+ {
+ this.storeTermVector = true;
+ this.storePositionWithTermVector = true;
+ this.storeOffsetWithTermVector = true;
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown termVector parameter " + termVector);
+ }
}
- /// <summary>True iff the value of the Field is to be stored in the index for return
- /// with search hits. It is an error for this to be true if a Field is
+ /// <summary>True iff the value of the field is to be stored in the index for return
+ /// with search hits. It is an error for this to be true if a field is
/// Reader-valued.
/// </summary>
public bool IsStored()
@@ -240,7 +617,7 @@
return isStored;
}
- /// <summary>True iff the value of the Field is to be indexed, so that it may be
+ /// <summary>True iff the value of the field is to be indexed, so that it may be
/// searched on.
/// </summary>
public bool IsIndexed()
@@ -248,7 +625,7 @@
return isIndexed;
}
- /// <summary>True iff the value of the Field should be tokenized as text prior to
+ /// <summary>True iff the value of the field should be tokenized as text prior to
/// indexing. Un-tokenized fields are indexed as a single word and may not be
/// Reader-valued.
/// </summary>
@@ -257,41 +634,125 @@
return isTokenized;
}
- /// <summary>True iff the term or terms used to index this Field are stored as a term
+ /// <summary>True if the value of the field is stored and compressed within the index </summary>
+ public bool IsCompressed()
+ {
+ return isCompressed;
+ }
+
+ /// <summary>True iff the term or terms used to index this field are stored as a term
/// vector, available from {@link IndexReader#GetTermFreqVector(int,String)}.
- /// These methods do not provide access to the original content of the Field,
+ /// These methods do not provide access to the original content of the field,
/// only to terms used to index it. If the original content must be
/// preserved, use the <code>stored</code> attribute instead.
///
/// </summary>
- /// <seealso cref="String)">
+ /// <seealso cref="IndexReader.GetTermFreqVector(int, String)">
/// </seealso>
public bool IsTermVectorStored()
{
return storeTermVector;
}
+ /// <summary> True iff terms are stored as term vector together with their offsets
+ /// (start and end positon in source text).
+ /// </summary>
+ public bool IsStoreOffsetWithTermVector()
+ {
+ return storeOffsetWithTermVector;
+ }
+
+ /// <summary> True iff terms are stored as term vector together with their token positions.</summary>
+ public bool IsStorePositionWithTermVector()
+ {
+ return storePositionWithTermVector;
+ }
+
+ /// <summary>True iff the value of the filed is stored as binary </summary>
+ public bool IsBinary()
+ {
+ return isBinary;
+ }
+
+ /// <summary>True if norms are omitted for this indexed field </summary>
+ public bool GetOmitNorms()
+ {
+ return omitNorms;
+ }
+
+ /// <summary>Expert:
+ ///
+ /// If set, omit normalization factors associated with this indexed field.
+ /// This effectively disables indexing boosts and length normalization for this field.
+ /// </summary>
+ public void SetOmitNorms(bool omitNorms)
+ {
+ this.omitNorms = omitNorms;
+ }
+
/// <summary>Prints a Field for human consumption. </summary>
public override System.String ToString()
{
- if (isStored && isIndexed && !isTokenized)
- return "Keyword<" + name + ":" + stringValue + ">";
- else if (isStored && !isIndexed && !isTokenized)
- return "Unindexed<" + name + ":" + stringValue + ">";
- else if (isStored && isIndexed && isTokenized && stringValue != null)
- return "Text<" + name + ":" + stringValue + ">";
- else if (!isStored && isIndexed && isTokenized && readerValue != null)
+ System.Text.StringBuilder result = new System.Text.StringBuilder();
+ if (isStored)
{
- return "Text<" + name + ":" + readerValue + ">";
+ result.Append("stored");
+ if (isCompressed)
+ result.Append("/compressed");
+ else
+ result.Append("/uncompressed");
}
- else if (!isStored && isIndexed && isTokenized)
+ if (isIndexed)
{
- return "UnStored<" + name + ">";
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("indexed");
}
- else
+ if (isTokenized)
{
- return base.ToString();
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("tokenized");
}
+ if (storeTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVector");
+ }
+ if (storeOffsetWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorPosition");
+ }
+ if (isBinary)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("binary");
+ }
+ if (omitNorms)
+ {
+ result.Append(",omitNorms");
+ }
+ result.Append('<');
+ result.Append(name);
+ result.Append(':');
+
+ if (fieldsData != null)
+ {
+ result.Append(fieldsData);
+ }
+
+ result.Append('>');
+ return result.ToString();
}
}
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/NumberTools.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Document/NumberTools.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/NumberTools.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/NumberTools.cs Sat Jun 3 19:41:13 2006
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Documents
+{
+
+ /// <summary> Provides support for converting longs to Strings, and back again. The strings
+ /// are structured so that lexicographic sorting order is preserved.
+ ///
+ /// <p>
+ /// That is, if l1 is less than l2 for any two longs l1 and l2, then
+ /// NumberTools.longToString(l1) is lexicographically less than
+ /// NumberTools.longToString(l2). (Similarly for "greater than" and "equals".)
+ ///
+ /// <p>
+ /// This class handles <b>all</b> long values (unlike
+ /// {@link Lucene.Net.document.DateField}).
+ ///
+ /// </summary>
+ /// <author> Matt Quail (spud at madbean dot com)
+ /// </author>
+ public class NumberTools
+ {
+
+ private const int RADIX = 16; // 36; {{Arousdh-1.9}} Java's is 36, but .NET's is 16; will this be an issue?
+
+ private const char NEGATIVE_PREFIX = '-';
+
+ // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
+ private const char POSITIVE_PREFIX = '0';
+
+ //NB: this must be less than
+ /// <summary> Equivalent to longToString(Long.MIN_VALUE)</summary>
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000";
+
+ /// <summary> Equivalent to longToString(Long.MAX_VALUE)</summary>
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7";
+
+ /// <summary> The length of (all) strings returned by {@link #longToString}</summary>
+ public static readonly int STR_SIZE = MIN_STRING_VALUE.Length;
+
+ /// <summary> Converts a long to a String suitable for indexing.</summary>
+ public static System.String LongToString(long l)
+ {
+
+ if (l == System.Int64.MinValue)
+ {
+ // special case, because long is not symetric around zero
+ return MIN_STRING_VALUE;
+ }
+
+ System.Text.StringBuilder buf = new System.Text.StringBuilder(STR_SIZE);
+
+ if (l < 0)
+ {
+ buf.Append(NEGATIVE_PREFIX);
+ l = System.Int64.MaxValue + l + 1;
+ }
+ else
+ {
+ buf.Append(POSITIVE_PREFIX);
+ }
+ System.String num = System.Convert.ToString(l, RADIX);
+
+ int padLen = STR_SIZE - num.Length - buf.Length;
+ while (padLen-- > 0)
+ {
+ buf.Append('0');
+ }
+ buf.Append(num);
+
+ return buf.ToString();
+ }
+
+ /// <summary> Converts a String that was returned by {@link #longToString} back to a
+ /// long.
+ ///
+ /// </summary>
+ /// <throws> IllegalArgumentException </throws>
+ /// <summary> if the input is null
+ /// </summary>
+ /// <throws> NumberFormatException </throws>
+ /// <summary> if the input does not parse (it was not a String returned by
+ /// longToString()).
+ /// </summary>
+ public static long StringToLong(System.String str)
+ {
+ if (str == null)
+ {
+ throw new System.NullReferenceException("string cannot be null");
+ }
+ if (str.Length != STR_SIZE)
+ {
+ throw new System.FormatException("string is the wrong size");
+ }
+
+ if (str.Equals(MIN_STRING_VALUE))
+ {
+ return System.Int64.MinValue;
+ }
+
+ char prefix = str[0];
+ long l = System.Convert.ToInt64(str.Substring(1), RADIX);
+
+ if (prefix == POSITIVE_PREFIX)
+ {
+ // nop
+ }
+ else if (prefix == NEGATIVE_PREFIX)
+ {
+ l = l - System.Int64.MaxValue - 1;
+ }
+ else
+ {
+ throw new System.FormatException("string does not begin with the correct prefix");
+ }
+
+ return l;
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/CompoundFileReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileReader.cs Sat Jun 3 19:41:13 2006
@@ -13,11 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
using Lock = Lucene.Net.Store.Lock;
-using OutputStream = Lucene.Net.Store.OutputStream;
+
namespace Lucene.Net.Index
{
@@ -29,8 +32,8 @@
/// </summary>
/// <author> Dmitry Serebrennikov
/// </author>
- /// <version> $Id: CompoundFileReader.java,v 1.7 2004/07/12 14:36:04 otis Exp $
- /// </version>
+ /// <version> $Id: CompoundFileReader.java 208905 2005-07-03 10:40:01Z dnaber $
+ /// </version>
public class CompoundFileReader : Directory
{
@@ -45,10 +48,7 @@
private Directory directory;
private System.String fileName;
- // Reference count
- private bool open;
-
- private InputStream stream;
+ private IndexInput stream;
private System.Collections.Hashtable entries = new System.Collections.Hashtable();
@@ -61,7 +61,7 @@
try
{
- stream = dir.OpenFile(name);
+ stream = dir.OpenInput(name);
// read the directory and init files
int count = stream.ReadVInt();
@@ -98,7 +98,7 @@
{
stream.Close();
}
- catch (System.IO.IOException e)
+ catch (System.IO.IOException)
{
}
}
@@ -128,7 +128,7 @@
}
}
- public override InputStream OpenFile(System.String id)
+ public override IndexInput OpenInput(System.String id)
{
lock (this)
{
@@ -139,7 +139,7 @@
if (entry == null)
throw new System.IO.IOException("No sub-file with id " + id + " found");
- return new CSInputStream(stream, entry.offset, entry.length);
+ return new CSIndexInput(stream, entry.offset, entry.length);
}
}
@@ -148,7 +148,7 @@
{
System.String[] res = new System.String[entries.Count];
entries.Keys.CopyTo(res, 0);
- return res;
+ return res;
}
/// <summary>Returns true iff a file with the given name exists. </summary>
@@ -157,34 +157,34 @@
return entries.ContainsKey(name);
}
- /// <summary>Returns the time the named file was last modified. </summary>
+ /// <summary>Returns the time the compound file was last modified. </summary>
public override long FileModified(System.String name)
{
return directory.FileModified(fileName);
}
- /// <summary>Set the modified time of an existing file to now. </summary>
+ /// <summary>Set the modified time of the compound file to now. </summary>
public override void TouchFile(System.String name)
{
directory.TouchFile(fileName);
}
- /// <summary>Removes an existing file in the directory. </summary>
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
public override void DeleteFile(System.String name)
{
throw new System.NotSupportedException();
}
- /// <summary>Renames an existing file in the directory.
- /// If a file already exists with the new name, then it is replaced.
- /// This replacement should be atomic.
- /// </summary>
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
public override void RenameFile(System.String from, System.String to)
{
throw new System.NotSupportedException();
}
- /// <summary>Returns the length of a file in the directory. </summary>
+ /// <summary>Returns the length of a file in the directory.</summary>
+ /// <throws> IOException if the file does not exist </throws>
public override long FileLength(System.String name)
{
FileEntry e = (FileEntry) entries[name];
@@ -193,38 +193,37 @@
return e.length;
}
- /// <summary>Creates a new, empty file in the directory with the given name.
- /// Returns a stream writing this file.
- /// </summary>
- public override OutputStream CreateFile(System.String name)
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
+ public override IndexOutput CreateOutput(System.String name)
{
throw new System.NotSupportedException();
}
- /// <summary>Construct a {@link Lock}.</summary>
- /// <param name="name">the name of the lock file
- /// </param>
+ /// <summary>Not implemented</summary>
+ /// <throws> UnsupportedOperationException </throws>
public override Lock MakeLock(System.String name)
{
throw new System.NotSupportedException();
}
- /// <summary>Implementation of an InputStream that reads from a portion of the
+ /// <summary>Implementation of an IndexInput that reads from a portion of the
/// compound file. The visibility is left as "package" *only* because
/// this helps with testing since JUnit test cases in a different class
/// can then access package fields of this class.
/// </summary>
- public /*internal*/ sealed class CSInputStream : InputStream
+ public /*internal*/ sealed class CSIndexInput : BufferedIndexInput
{
- public /*internal*/ InputStream base_Renamed;
+ public /*internal*/ IndexInput base_Renamed;
internal long fileOffset;
+ internal long length;
- internal CSInputStream(InputStream base_Renamed, long fileOffset, long length)
+ internal CSIndexInput(IndexInput base_Renamed, long fileOffset, long length)
{
this.base_Renamed = base_Renamed;
this.fileOffset = fileOffset;
- this.length = length; // variable in the superclass
+ this.length = length;
}
/// <summary>Expert: implements buffer refill. Reads bytes from the current
@@ -234,7 +233,7 @@
/// </param>
/// <param name="offset">the offset in the array to start storing bytes
/// </param>
- /// <param name="length">the number of bytes to read
+ /// <param name="len">the number of bytes to read
/// </param>
public override void ReadInternal(byte[] b, int offset, int len)
{
@@ -251,15 +250,20 @@
/// <summary>Expert: implements seek. Sets current position in this file, where
/// the next {@link #ReadInternal(byte[],int,int)} will occur.
/// </summary>
- /// <seealso cref="#ReadInternal(byte[],int,int)">
+ /// <seealso cref="ReadInternal(byte[],int,int)">
/// </seealso>
public override void SeekInternal(long pos)
{
}
- /// <summary>Closes the stream to futher operations. </summary>
+ /// <summary>Closes the stream to further operations. </summary>
public override void Close()
{
+ }
+
+ public override long Length()
+ {
+ return length;
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/CompoundFileWriter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/CompoundFileWriter.cs Sat Jun 3 19:41:13 2006
@@ -13,10 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
namespace Lucene.Net.Index
{
@@ -29,7 +31,7 @@
/// fileCount entries with the following structure:</li>
/// <ul>
/// <li>long dataOffset</li>
- /// <li>UTFString extension</li>
+ /// <li>String fileName</li>
/// </ul>
/// <li>{File Data}
/// fileCount entries with the raw data of the corresponding file</li>
@@ -37,15 +39,15 @@
///
/// The fileCount integer indicates how many files are contained in this compound
/// file. The {directory} that follows has that many entries. Each directory entry
- /// contains an encoding identifier, an long pointer to the start of this file's
- /// data section, and a UTF String with that file's extension.
+ /// contains a long pointer to the start of this file's data section, and a String
+ /// with that file's name.
///
/// </summary>
/// <author> Dmitry Serebrennikov
/// </author>
- /// <version> $Id: CompoundFileWriter.java,v 1.3 2004/03/29 22:48:02 cutting Exp $
+ /// <version> $Id: CompoundFileWriter.java 179621 2005-06-02 18:18:50Z dnaber $
/// </version>
- sealed public class CompoundFileWriter
+ public sealed class CompoundFileWriter
{
private sealed class FileEntry
@@ -71,12 +73,13 @@
/// <summary>Create the compound stream in the specified file. The file name is the
/// entire name (no extensions are added).
/// </summary>
+ /// <throws> NullPointerException if <code>dir</code> or <code>name</code> is null </throws>
public CompoundFileWriter(Directory dir, System.String name)
{
if (dir == null)
- throw new System.ArgumentException("Missing directory");
+ throw new System.NullReferenceException("directory cannot be null");
if (name == null)
- throw new System.ArgumentException("Missing name");
+ throw new System.NullReferenceException("name cannot be null");
directory = dir;
fileName = name;
@@ -96,11 +99,14 @@
return fileName;
}
- /// <summary>Add a source stream. If sourceDir is null, it is set to the
- /// same value as the directory where this compound stream exists.
- /// The id is the string by which the sub-stream will be know in the
- /// compound stream. The caller must ensure that the ID is unique. If the
- /// id is null, it is set to the name of the source file.
+ /// <summary>Add a source stream. <code>file</code> is the string by which the
+ /// sub-stream will be known in the compound stream.
+ ///
+ /// </summary>
+ /// <throws> IllegalStateException if this writer is closed </throws>
+ /// <throws> NullPointerException if <code>file</code> is null </throws>
+ /// <throws> IllegalArgumentException if a file with the same name </throws>
+ /// <summary> has been added already
/// </summary>
public void AddFile(System.String file)
{
@@ -108,7 +114,7 @@
throw new System.SystemException("Can't add extensions after merge has been called");
if (file == null)
- throw new System.ArgumentException("Missing source file");
+ throw new System.NullReferenceException("file cannot be null");
try
{
@@ -129,6 +135,9 @@
/// compound stream. After successful merge, the source files
/// are deleted.
/// </summary>
+ /// <throws> IllegalStateException if close() had been called before or </throws>
+ /// <summary> if no file has been added to this object
+ /// </summary>
public void Close()
{
if (merged)
@@ -140,10 +149,10 @@
merged = true;
// open the compound stream
- OutputStream os = null;
+ IndexOutput os = null;
try
{
- os = directory.CreateFile(fileName);
+ os = directory.CreateOutput(fileName);
// Write the number of entries
os.WriteVInt(entries.Count);
@@ -161,7 +170,7 @@
}
// Open the files and copy their data into the stream.
- // Remeber the locations of each file's data section.
+ // Remember the locations of each file's data section.
byte[] buffer = new byte[1024];
it = entries.GetEnumerator();
while (it.MoveNext())
@@ -184,7 +193,7 @@
// close so that if an exception occurs during the close, the
// finally clause below will not attempt to close the stream
// the second time.
- OutputStream tmp = os;
+ IndexOutput tmp = os;
os = null;
tmp.Close();
}
@@ -205,14 +214,14 @@
/// provided output stream. Use the provided buffer for moving data
/// to reduce memory allocation.
/// </summary>
- private void CopyFile(FileEntry source, OutputStream os, byte[] buffer)
+ private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
{
- InputStream is_Renamed = null;
+ IndexInput is_Renamed = null;
try
{
long startPtr = os.GetFilePointer();
- is_Renamed = directory.OpenFile(source.file);
+ is_Renamed = directory.OpenInput(source.file);
long length = is_Renamed.Length();
long remainder = length;
int chunk = buffer.Length;
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/DocumentWriter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentWriter.cs Sat Jun 3 19:41:13 2006
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using Token = Lucene.Net.Analysis.Token;
@@ -21,43 +22,63 @@
using Field = Lucene.Net.Documents.Field;
using Similarity = Lucene.Net.Search.Similarity;
using Directory = Lucene.Net.Store.Directory;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
namespace Lucene.Net.Index
{
- sealed public class DocumentWriter
+ public sealed class DocumentWriter
{
+ private void InitBlock()
+ {
+ termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
+ }
private Analyzer analyzer;
private Directory directory;
private Similarity similarity;
private FieldInfos fieldInfos;
private int maxFieldLength;
+ private int termIndexInterval;
+ private System.IO.TextWriter infoStream;
- /// <summary> </summary>
+ /// <summary>This ctor used by test code only.
+ ///
+ /// </summary>
/// <param name="directory">The directory to write the document information to
/// </param>
/// <param name="analyzer">The analyzer to use for the document
/// </param>
/// <param name="similarity">The Similarity function
/// </param>
- /// <param name="maxFieldLength">The maximum number of tokens a Field may have
+ /// <param name="maxFieldLength">The maximum number of tokens a field may have
/// </param>
- public /*internal*/ DocumentWriter(Directory directory, Analyzer analyzer, Similarity similarity, int maxFieldLength)
+ public DocumentWriter(Directory directory, Analyzer analyzer, Similarity similarity, int maxFieldLength)
{
+ InitBlock();
this.directory = directory;
this.analyzer = analyzer;
this.similarity = similarity;
this.maxFieldLength = maxFieldLength;
}
- /*internal*/ public void AddDocument(System.String segment, Document doc)
+ public DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer)
{
- // write Field names
+ InitBlock();
+ this.directory = directory;
+ this.analyzer = analyzer;
+ this.similarity = writer.GetSimilarity();
+ this.maxFieldLength = writer.GetMaxFieldLength();
+ this.termIndexInterval = writer.GetTermIndexInterval();
+ }
+
+ public /*internal*/ void AddDocument(System.String segment, Document doc)
+ {
+ // write field names
fieldInfos = new FieldInfos();
fieldInfos.Add(doc);
fieldInfos.Write(directory, segment + ".fnm");
- // write Field values
+ // write field values
FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
try
{
@@ -72,6 +93,7 @@
postingTable.Clear(); // clear postingTable
fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
+ fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets
fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
float boost = doc.GetBoost();
@@ -102,7 +124,7 @@
WritePostings(postings, segment);
// write norms of indexed fields
- WriteNorms(doc, segment);
+ WriteNorms(segment);
}
// Keys are Terms, values are Postings.
@@ -110,25 +132,36 @@
private System.Collections.Hashtable postingTable = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
private int[] fieldLengths;
private int[] fieldPositions;
+ private int[] fieldOffsets;
private float[] fieldBoosts;
// Tokenizes the fields of a document into Postings.
private void InvertDocument(Document doc)
{
- foreach(Field field in doc.Fields())
- {
+ System.Collections.IEnumerator fields = doc.Fields();
+ while (fields.MoveNext())
+ {
+ Field field = (Field) fields.Current;
System.String fieldName = field.Name();
int fieldNumber = fieldInfos.FieldNumber(fieldName);
- int length = fieldLengths[fieldNumber]; // length of Field
- int position = fieldPositions[fieldNumber]; // position in Field
+ int length = fieldLengths[fieldNumber]; // length of field
+ int position = fieldPositions[fieldNumber]; // position in field
+ if (length > 0)
+ position += analyzer.GetPositionIncrementGap(fieldName);
+ int offset = fieldOffsets[fieldNumber]; // offset field
if (field.IsIndexed())
{
if (!field.IsTokenized())
{
- // un-tokenized Field
- AddPosition(fieldName, field.StringValue(), position++);
+ // un-tokenized field
+ System.String stringValue = field.StringValue();
+ if (field.IsStoreOffsetWithTermVector())
+ AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length));
+ else
+ AddPosition(fieldName, stringValue, position++, null);
+ offset += stringValue.Length;
length++;
}
else
@@ -139,19 +172,33 @@
else if (field.StringValue() != null)
reader = new System.IO.StringReader(field.StringValue());
else
- throw new System.ArgumentException("Field must have either String or Reader value");
+ throw new System.ArgumentException("field must have either String or Reader value");
- // Tokenize Field and add to postingTable
+ // Tokenize field and add to postingTable
TokenStream stream = analyzer.TokenStream(fieldName, reader);
try
{
+ Token lastToken = null;
for (Token t = stream.Next(); t != null; t = stream.Next())
{
position += (t.GetPositionIncrement() - 1);
- AddPosition(fieldName, t.TermText(), position++);
+
+ if (field.IsStoreOffsetWithTermVector())
+ AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset()));
+ else
+ AddPosition(fieldName, t.TermText(), position++, null);
+
+ lastToken = t;
if (++length > maxFieldLength)
+ {
+ if (infoStream != null)
+ infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens");
break;
+ }
}
+
+ if (lastToken != null)
+ offset += lastToken.EndOffset() + 1;
}
finally
{
@@ -159,18 +206,20 @@
}
}
- fieldLengths[fieldNumber] = length; // save Field length
- fieldPositions[fieldNumber] = position; // save Field position
+ fieldLengths[fieldNumber] = length; // save field length
+ fieldPositions[fieldNumber] = position; // save field position
fieldBoosts[fieldNumber] *= field.GetBoost();
+ fieldOffsets[fieldNumber] = offset;
}
}
}
private Term termBuffer = new Term("", ""); // avoid consing
- private void AddPosition(System.String field, System.String text, int position)
+ private void AddPosition(System.String field, System.String text, int position, TermVectorOffsetInfo offset)
{
termBuffer.Set(field, text);
+ //System.out.println("Offset: " + offset);
Posting ti = (Posting) postingTable[termBuffer];
if (ti != null)
{
@@ -187,13 +236,28 @@
ti.positions = newPositions;
}
ti.positions[freq] = position; // add new position
+
+ if (offset != null)
+ {
+ if (ti.offsets.Length == freq)
+ {
+ TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[freq * 2];
+ TermVectorOffsetInfo[] offsets = ti.offsets;
+ for (int i = 0; i < freq; i++)
+ {
+ newOffsets[i] = offsets[i];
+ }
+ ti.offsets = newOffsets;
+ }
+ ti.offsets[freq] = offset;
+ }
ti.freq = freq + 1; // update frequency
}
else
{
// word not seen before
Term term = new Term(field, text, false);
- postingTable[term] = new Posting(term, position);
+ postingTable[term] = new Posting(term, position, offset);
}
}
@@ -276,15 +340,15 @@
private void WritePostings(Posting[] postings, System.String segment)
{
- OutputStream freq = null, prox = null;
+ IndexOutput freq = null, prox = null;
TermInfosWriter tis = null;
TermVectorsWriter termVectorWriter = null;
try
{
//open files for inverse index storage
- freq = directory.CreateFile(segment + ".frq");
- prox = directory.CreateFile(segment + ".prx");
- tis = new TermInfosWriter(directory, segment, fieldInfos);
+ freq = directory.CreateOutput(segment + ".frq");
+ prox = directory.CreateOutput(segment + ".prx");
+ tis = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
TermInfo ti = new TermInfo();
System.String currentField = null;
@@ -317,11 +381,11 @@
prox.WriteVInt(position - lastPosition);
lastPosition = position;
}
- // check to see if we switched to a new Field
+ // check to see if we switched to a new field
System.String termField = posting.term.Field();
if (currentField != termField)
{
- // changing Field - see if there is something to save
+ // changing field - see if there is something to save
currentField = termField;
FieldInfo fi = fieldInfos.FieldInfo(currentField);
if (fi.storeTermVector)
@@ -340,7 +404,7 @@
}
if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
{
- termVectorWriter.AddTerm(posting.term.Text(), postingFreq);
+ termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
}
}
if (termVectorWriter != null)
@@ -393,23 +457,23 @@
}
if (keep != null)
{
- throw new System.IO.IOException(keep.StackTrace);
+ throw new System.IO.IOException(keep.StackTrace);
}
}
}
- private void WriteNorms(Document doc, System.String segment)
+ private void WriteNorms(System.String segment)
{
for (int n = 0; n < fieldInfos.Size(); n++)
{
FieldInfo fi = fieldInfos.FieldInfo(n);
- if (fi.isIndexed)
+ if (fi.isIndexed && !fi.omitNorms)
{
float norm = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
- OutputStream norms = directory.CreateFile(segment + ".f" + n);
+ IndexOutput norms = directory.CreateOutput(segment + ".f" + n);
try
{
- norms.WriteByte(Lucene.Net.Search.Similarity.EncodeNorm(norm));
+ norms.WriteByte(Similarity.EncodeNorm(norm));
}
finally
{
@@ -418,6 +482,12 @@
}
}
}
+
+ /// <summary>If non-null, a message will be printed to this if maxFieldLength is reached.</summary>
+ internal void SetInfoStream(System.IO.TextWriter infoStream)
+ {
+ this.infoStream = infoStream;
+ }
}
sealed class Posting
@@ -426,13 +496,21 @@
internal Term term; // the Term
internal int freq; // its frequency in doc
internal int[] positions; // positions it occurs at
+ internal TermVectorOffsetInfo[] offsets;
- internal Posting(Term t, int position)
+ internal Posting(Term t, int position, TermVectorOffsetInfo offset)
{
term = t;
freq = 1;
positions = new int[1];
positions[0] = position;
+ if (offset != null)
+ {
+ offsets = new TermVectorOffsetInfo[1];
+ offsets[0] = offset;
+ }
+ else
+ offsets = null;
}
}
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldInfo.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs Sat Jun 3 19:41:13 2006
@@ -13,24 +13,39 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
using System;
+
namespace Lucene.Net.Index
{
- sealed public class FieldInfo
+
+ public sealed class FieldInfo
{
internal System.String name;
internal bool isIndexed;
internal int number;
- // true if term vector for this Field should be stored
+ // true if term vector for this field should be stored
public /*internal*/ bool storeTermVector;
+ public /*internal*/ bool storeOffsetWithTermVector;
+ public /*internal*/ bool storePositionWithTermVector;
- internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector)
+ public /*internal*/ bool omitNorms; // omit norms associated with indexed fields
+
+ public bool IsIndexed
+ {
+ get { return isIndexed; }
+ }
+
+ internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
{
name = na;
isIndexed = tk;
number = nu;
this.storeTermVector = storeTermVector;
+ this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+ this.storePositionWithTermVector = storePositionWithTermVector;
+ this.omitNorms = omitNorms;
}
}
}