You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by do...@apache.org on 2009/07/29 20:04:24 UTC
svn commit: r798995 [7/35] - in /incubator/lucene.net/trunk/C#/src:
Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/
Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/
Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net/...
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriterThreadState.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/DocumentsWriterThreadState.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriterThreadState.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriterThreadState.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+ /// <summary>
+ /// Used by DocumentsWriter to maintain per-thread state.
+ /// We keep a separate Posting hash and other state for each
+ /// thread and then merge postings hashes from all threads
+ /// when writing the segment.
+ /// </summary>
+ sealed internal class DocumentsWriterThreadState
+ {
+ internal bool isIdle = true; // false if this is currently in use by a thread
+ internal int numThreads = 1; // Number of threads that share this instance
+ internal bool doFlushAfter; // true if we should flush after processing current doc
+ internal readonly DocConsumerPerThread consumer;
+ internal readonly DocumentsWriter.DocState docState;
+
+ internal readonly DocumentsWriter docWriter;
+
+ public DocumentsWriterThreadState(DocumentsWriter docWriter)
+ {
+ this.docWriter = docWriter;
+ docState = new DocumentsWriter.DocState();
+ docState.maxFieldLength = docWriter.maxFieldLength;
+ docState.infoStream = docWriter.infoStream;
+ docState.similarity = docWriter.similarity;
+ docState.docWriter = docWriter;
+ consumer = docWriter.consumer.addThread(this);
+ }
+
+ internal void doAfterFlush()
+ {
+ numThreads = 0;
+ doFlushAfter = false;
+ }
+ }
+}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldInfo.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs Wed Jul 29 18:04:12 2009
@@ -32,15 +32,16 @@
public bool storePositionWithTermVector;
public bool omitNorms; // omit norms associated with indexed fields
-
- public bool IsIndexed()
- {
- return isIndexed;
- }
-
+ internal bool omitTf;
+
+ public bool omitTf_ForNUnitTest
+ {
+ get { return omitTf; }
+ }
+
internal bool storePayloads; // whether this field stores payloads together with term positions
- internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads)
+ internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
{
name = na;
isIndexed = tk;
@@ -50,15 +51,81 @@
this.storePositionWithTermVector = storePositionWithTermVector;
this.omitNorms = omitNorms;
this.storePayloads = storePayloads;
+ this.omitTf = omitTf;
}
- public System.Object Clone()
+ public object Clone()
{
- return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+ return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
}
+ internal void update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector,
+ bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
+ {
+ if (this.isIndexed != isIndexed)
+ {
+ this.isIndexed = true; // once indexed, always index
+ }
+ if (this.storeTermVector != storeTermVector)
+ {
+ this.storeTermVector = true; // once vector, always vector
+ }
+ if (this.storePositionWithTermVector != storePositionWithTermVector)
+ {
+ this.storePositionWithTermVector = true; // once vector, always vector
+ }
+ if (this.storeOffsetWithTermVector != storeOffsetWithTermVector)
+ {
+ this.storeOffsetWithTermVector = true; // once vector, always vector
+ }
+ if (this.omitNorms != omitNorms)
+ {
+ this.omitNorms = false; // once norms are stored, always store
+ }
+ if (this.omitTf != omitTf)
+ {
+ this.omitTf = true; // if one require omitTf at least once, it remains off for life
+ }
+ if (this.storePayloads != storePayloads)
+ {
+ this.storePayloads = true;
+ }
+ }
+
+ internal void update(FieldInfo other)
+ {
+ if (isIndexed != other.isIndexed)
+ {
+ isIndexed = true; // once indexed, always index
+ }
+ if (storeTermVector != other.storeTermVector)
+ {
+ storeTermVector = true; // once vector, always vector
+ }
+ if (storePositionWithTermVector != other.storePositionWithTermVector)
+ {
+ storePositionWithTermVector = true; // once vector, always vector
+ }
+ if (storeOffsetWithTermVector != other.storeOffsetWithTermVector)
+ {
+ storeOffsetWithTermVector = true; // once vector, always vector
+ }
+ if (omitNorms != other.omitNorms)
+ {
+ omitNorms = false; // once norms are stored, always store
+ }
+ if (omitTf != other.omitTf)
+ {
+ omitTf = true; // if one require omitTf at least once, it remains off for life
+ }
+ if (storePayloads != other.storePayloads)
+ {
+ storePayloads = true;
+ }
+ }
+
// For testing only
- public System.String Name_ForNUnitTest
+ public string Name_ForNUnitTest
{
get { return name; }
}
@@ -68,5 +135,10 @@
{
get { return storePayloads; }
}
+
+ public bool IsIndexed_ForNUnitTest()
+ {
+ return isIndexed;
+ }
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldInfos.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs Wed Jul 29 18:04:12 2009
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-using System;
+using System.Collections.Generic;
using Document = Lucene.Net.Documents.Document;
using Fieldable = Lucene.Net.Documents.Fieldable;
@@ -27,7 +27,7 @@
{
/// <summary>Access to the Fieldable Info file that describes document fields and whether or
- /// not they are indexed. Each segment has a separate Fieldable Info file. Objects
+ /// not they are indexed. Each segment has a separate Fieldable Info file. objects
/// of this class are thread-safe for multiple readers, but only one thread can
/// be adding documents at a time, with no other reader or writer threads
/// accessing this object.
@@ -40,10 +40,11 @@
internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x4);
internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x8);
internal const byte OMIT_NORMS = (byte) (0x10);
- internal const byte STORE_PAYLOADS = (byte) (0x20);
-
- private System.Collections.ArrayList byNumber = new System.Collections.ArrayList();
- private System.Collections.Hashtable byName = new System.Collections.Hashtable();
+ internal const byte STORE_PAYLOADS = (byte)(0x20);
+ internal const byte OMIT_TF = (byte)(0x40);
+
+ private List<FieldInfo> byNumber = new List<FieldInfo>();
+ private Dictionary<string, FieldInfo> byName = new Dictionary<string, FieldInfo>();
public FieldInfos()
{
@@ -57,7 +58,7 @@
/// <param name="name">The name of the file to open the IndexInput from in the Directory
/// </param>
/// <throws> IOException </throws>
- public FieldInfos(Directory d, System.String name)
+ public FieldInfos(Directory d, string name)
{
IndexInput input = d.OpenInput(name);
try
@@ -71,30 +72,49 @@
}
/// <summary> Returns a deep clone of this FieldInfos instance.</summary>
- public System.Object Clone()
+ public object Clone()
{
- FieldInfos fis = new FieldInfos();
- int numField = byNumber.Count;
- for (int i = 0; i < numField; i++)
- {
- FieldInfo fi = (FieldInfo) ((FieldInfo) byNumber[i]).Clone();
- fis.byNumber.Add(fi);
- fis.byName[fi.name] = fi;
- }
- return fis;
+ lock (this)
+ {
+ FieldInfos fis = new FieldInfos();
+ int numField = byNumber.Count;
+ for (int i = 0; i < numField; i++)
+ {
+ FieldInfo fi = (FieldInfo)(byNumber[i].Clone());
+ fis.byNumber.Add(fi);
+ fis.byName[fi.name] = fi;
+ }
+ return fis;
+ }
}
/// <summary>Adds field info for a Document. </summary>
public void Add(Document doc)
{
- System.Collections.IList fields = doc.GetFields();
- System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
- while (fieldIterator.MoveNext())
- {
- Fieldable field = (Fieldable) fieldIterator.Current;
- Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
- }
+ lock (this)
+ {
+ System.Collections.IList fields = doc.GetFields();
+ System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
+ while (fieldIterator.MoveNext())
+ {
+ Fieldable field = (Fieldable)fieldIterator.Current;
+ Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
+ }
+ }
}
+
+ /// <summary>
+ /// Returns true if any fields *do not* omit tf.
+ /// </summary>
+ /// <returns></returns>
+ internal bool HasProx()
+ {
+ int numFields = byNumber.Count;
+ for (int i = 0; i < numFields; i++)
+ if (!FieldInfo(i).omitTf)
+ return true;
+ return false;
+ }
/// <summary> Add fields that are indexed. Whether they have termvectors has to be specified.
///
@@ -109,12 +129,15 @@
/// </param>
public void AddIndexed(System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
- System.Collections.IEnumerator i = names.GetEnumerator();
- while (i.MoveNext())
- {
- System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry) i.Current;
- Add((System.String) t.Key, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
- }
+ lock (this)
+ {
+ System.Collections.IEnumerator i = names.GetEnumerator();
+ while (i.MoveNext())
+ {
+ System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry)i.Current;
+ Add((string)t.Key, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+ }
+ }
}
/// <summary> Assumes the fields are not storing term vectors.
@@ -125,15 +148,18 @@
/// <param name="isIndexed">Whether the fields are indexed or not
///
/// </param>
- /// <seealso cref="Add(String, boolean)">
+ /// <seealso cref="Add(string, boolean)">
/// </seealso>
- public void Add(System.Collections.ICollection names, bool isIndexed)
+ public void Add(ICollection<string> names, bool isIndexed)
{
- System.Collections.IEnumerator i = names.GetEnumerator();
- while (i.MoveNext())
- {
- Add((System.String) i.Current, isIndexed);
- }
+ lock (this)
+ {
+ IEnumerator<string> i = names.GetEnumerator();
+ while (i.MoveNext())
+ {
+ Add(i.Current, isIndexed);
+ }
+ }
}
/// <summary> Calls 5 parameter add with false for all TermVector parameters.
@@ -143,11 +169,14 @@
/// </param>
/// <param name="isIndexed">true if the field is indexed
/// </param>
- /// <seealso cref="Add(String, boolean, boolean, boolean, boolean)">
+ /// <seealso cref="Add(string, boolean, boolean, boolean, boolean)">
/// </seealso>
- public void Add(System.String name, bool isIndexed)
+ public void Add(string name, bool isIndexed)
{
- Add(name, isIndexed, false, false, false, false);
+ lock (this)
+ {
+ Add(name, isIndexed, false, false, false, false);
+ }
}
/// <summary> Calls 5 parameter add with false for term vector positions and offsets.
@@ -159,9 +188,12 @@
/// </param>
/// <param name="storeTermVector">true if the term vector should be stored
/// </param>
- public void Add(System.String name, bool isIndexed, bool storeTermVector)
+ public void Add(string name, bool isIndexed, bool storeTermVector)
{
- Add(name, isIndexed, storeTermVector, false, false, false);
+ lock (this)
+ {
+ Add(name, isIndexed, storeTermVector, false, false, false);
+ }
}
/// <summary>If the field is not yet known, adds it. If it is known, checks to make
@@ -180,10 +212,12 @@
/// </param>
/// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
/// </param>
- public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+ public void Add(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
-
- Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ lock (this)
+ {
+ Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ }
}
/// <summary>If the field is not yet known, adds it. If it is known, checks to make
@@ -204,9 +238,12 @@
/// </param>
/// <param name="omitNorms">true if the norms for the indexed field should be omitted
/// </param>
- public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
+ public void Add(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
{
- Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false);
+ lock (this)
+ {
+ Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false);
+ }
}
/// <summary>If the field is not yet known, adds it. If it is known, checks to make
@@ -229,60 +266,57 @@
/// </param>
/// <param name="storePayloads">true if payloads should be stored for this field
/// </param>
- public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads)
- {
- FieldInfo fi = FieldInfo(name);
- if (fi == null)
- {
- return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
- }
- else
- {
- if (fi.isIndexed != isIndexed)
- {
- fi.isIndexed = true; // once indexed, always index
- }
- if (fi.storeTermVector != storeTermVector)
- {
- fi.storeTermVector = true; // once vector, always vector
- }
- if (fi.storePositionWithTermVector != storePositionWithTermVector)
- {
- fi.storePositionWithTermVector = true; // once vector, always vector
- }
- if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector)
- {
- fi.storeOffsetWithTermVector = true; // once vector, always vector
- }
- if (fi.omitNorms != omitNorms)
- {
- fi.omitNorms = false; // once norms are stored, always store
- }
- if (fi.storePayloads != storePayloads)
- {
- fi.storePayloads = true;
- }
- }
- return fi;
- }
-
- private FieldInfo AddInternal(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads)
+ public FieldInfo Add(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
+ {
+ lock (this)
+ {
+ FieldInfo fi = FieldInfo(name);
+ if (fi == null)
+ {
+ return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
+ }
+ else
+ {
+ fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
+ }
+ return fi;
+ }
+ }
+
+ public FieldInfo Add(FieldInfo fieldInfo)
+ {
+ lock (this)
+ {
+ FieldInfo fi = FieldInfo(fieldInfo.name);
+ if (fi == null)
+ {
+ return AddInternal(fieldInfo.name, fieldInfo.isIndexed, fieldInfo.storeTermVector, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector, fieldInfo.omitNorms, fieldInfo.storePayloads, fieldInfo.omitTf);
+ }
+ else
+ {
+ fi.update(fieldInfo);
+ }
+ return fi;
+ }
+ }
+
+ private FieldInfo AddInternal(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
{
- FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+ FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
byNumber.Add(fi);
byName[name] = fi;
return fi;
}
- public int FieldNumber(System.String fieldName)
+ public int FieldNumber(string fieldName)
{
FieldInfo fi = FieldInfo(fieldName);
return (fi != null) ? fi.number : -1;
}
- public FieldInfo FieldInfo(System.String fieldName)
+ public FieldInfo FieldInfo(string fieldName)
{
- return (FieldInfo) byName[fieldName];
+ return byName.ContainsKey(fieldName) ? byName[fieldName] : null;
}
/// <summary> Return the fieldName identified by its number.
@@ -293,7 +327,7 @@
/// <returns> the fieldName or an empty string when the field
/// with the given number doesn't exist.
/// </returns>
- public System.String FieldName(int fieldNumber)
+ public string FieldName(int fieldNumber)
{
FieldInfo fi = FieldInfo(fieldNumber);
return (fi != null) ? fi.name : "";
@@ -307,7 +341,7 @@
/// </returns>
public FieldInfo FieldInfo(int fieldNumber)
{
- return (fieldNumber >= 0) ? (FieldInfo) byNumber[fieldNumber] : null;
+ return (fieldNumber >= 0) ? byNumber[fieldNumber] : null;
}
public int Size()
@@ -329,7 +363,7 @@
return hasVectors;
}
- public void Write(Directory d, System.String name)
+ public void Write(Directory d, string name)
{
IndexOutput output = d.CreateOutput(name);
try
@@ -361,7 +395,10 @@
bits |= OMIT_NORMS;
if (fi.storePayloads)
bits |= STORE_PAYLOADS;
- output.WriteString(fi.name);
+ if (fi.omitTf)
+ bits |= OMIT_TF;
+
+ output.WriteString(fi.name);
output.WriteByte(bits);
}
}
@@ -371,7 +408,7 @@
int size = input.ReadVInt(); //read in the size
for (int i = 0; i < size; i++)
{
- System.String name = String.Intern(input.ReadString());
+ string name = string.Intern(input.ReadString());
byte bits = input.ReadByte();
bool isIndexed = (bits & IS_INDEXED) != 0;
bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
@@ -379,9 +416,10 @@
bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
bool omitNorms = (bits & OMIT_NORMS) != 0;
bool storePayloads = (bits & STORE_PAYLOADS) != 0;
+ bool omitTf = (bits & OMIT_TF) != 0;
- AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+ AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
}
}
}
-}
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs Wed Jul 29 18:04:12 2009
@@ -27,19 +27,19 @@
public class FieldSortedTermVectorMapper : TermVectorMapper
{
private System.Collections.IDictionary fieldToTerms = new System.Collections.Hashtable();
- private System.Collections.Generic.SortedDictionary<Object, Object> currentSet;
+ private System.Collections.Generic.SortedDictionary<object, object> currentSet;
private System.String currentField;
- private System.Collections.Generic.IComparer<Object> comparator;
+ private System.Collections.Generic.IComparer<object> comparator;
/// <summary> </summary>
/// <param name="comparator">A Comparator for sorting {@link TermVectorEntry}s
/// </param>
- public FieldSortedTermVectorMapper(System.Collections.Generic.IComparer<Object> comparator) : this(false, false, comparator)
+ public FieldSortedTermVectorMapper(System.Collections.Generic.IComparer<object> comparator) : this(false, false, comparator)
{
}
- public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, System.Collections.Generic.IComparer<Object> comparator) : base(ignoringPositions, ignoringOffsets)
+ public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, System.Collections.Generic.IComparer<object> comparator) : base(ignoringPositions, ignoringOffsets)
{
this.comparator = comparator;
}
@@ -52,7 +52,8 @@
public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
{
- currentSet = new System.Collections.Generic.SortedDictionary<Object, Object>(comparator);
+ currentSet = new System.Collections.Generic.SortedDictionary<object, object>(comparator);
+
currentField = field;
fieldToTerms[field] = currentSet;
}
@@ -68,7 +69,7 @@
}
- public virtual System.Collections.Generic.IComparer<Object> GetComparator()
+ public virtual System.Collections.Generic.IComparer<object> GetComparator()
{
return comparator;
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldsReader.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs Wed Jul 29 18:04:12 2009
@@ -20,627 +20,704 @@
using Lucene.Net.Documents;
using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using CloseableThreadLocal = Lucene.Net.Util.CloseableThreadLocal;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using TokenStream = Lucene.Net.Analysis.TokenStream;
namespace Lucene.Net.Index
{
-
- /// <summary> Class responsible for access to stored document fields.
- /// <p/>
- /// It uses <segment>.fdt and <segment>.fdx; files.
- ///
- /// </summary>
- /// <version> $Id: FieldsReader.java 620759 2008-02-12 11:10:21Z mikemccand $
- /// </version>
- public sealed class FieldsReader
- {
- private FieldInfos fieldInfos;
-
- // The main fieldStream, used only for cloning.
- private IndexInput cloneableFieldsStream;
-
- // This is a clone of cloneableFieldsStream used for reading documents.
- // It should not be cloned outside of a synchronized context.
- private IndexInput fieldsStream;
-
- private IndexInput indexStream;
- private int numTotalDocs;
- private int size;
- private bool closed;
-
- // The docID offset where our docs begin in the index
- // file. This will be 0 if we have our own private file.
- private int docStoreOffset;
-
- private System.LocalDataStoreSlot fieldsStreamTL = System.Threading.Thread.AllocateDataSlot();
-
- public FieldsReader(Directory d, System.String segment, FieldInfos fn) : this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
- {
- }
-
- internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize) : this(d, segment, fn, readBufferSize, - 1, 0)
- {
- }
-
- internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
- {
- bool success = false;
-
- try
- {
- fieldInfos = fn;
-
- cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize);
- fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
- indexStream = d.OpenInput(segment + ".fdx", readBufferSize);
-
- if (docStoreOffset != - 1)
- {
- // We read only a slice out of this shared fields file
- this.docStoreOffset = docStoreOffset;
- this.size = size;
-
- // Verify the file is long enough to hold all of our
- // docs
- System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset);
- }
- else
- {
- this.docStoreOffset = 0;
- this.size = (int) (indexStream.Length() >> 3);
- }
-
- numTotalDocs = (int) (indexStream.Length() >> 3);
- success = true;
- }
- finally
- {
- // With lock-less commits, it's entirely possible (and
- // fine) to hit a FileNotFound exception above. In
- // this case, we want to explicitly close any subset
- // of things that were opened so that we don't have to
- // wait for a GC to do so.
- if (!success)
- {
- Close();
- }
- }
- }
-
- /// <throws> AlreadyClosedException if this FieldsReader is closed </throws>
- internal void EnsureOpen()
- {
- if (closed)
- {
- throw new AlreadyClosedException("this FieldsReader is closed");
- }
- }
-
- /// <summary> Closes the underlying {@link Lucene.Net.Store.IndexInput} streams, including any ones associated with a
- /// lazy implementation of a Field. This means that the Fields values will not be accessible.
- ///
- /// </summary>
- /// <throws> IOException </throws>
- public void Close()
- {
- if (!closed)
- {
- if (fieldsStream != null)
- {
- fieldsStream.Close();
- }
- if (cloneableFieldsStream != null)
- {
- cloneableFieldsStream.Close();
- }
- if (indexStream != null)
- {
- indexStream.Close();
- }
- IndexInput localFieldsStream = (IndexInput) System.Threading.Thread.GetData(fieldsStreamTL);
- if (localFieldsStream != null)
- {
- localFieldsStream.Close();
- System.Threading.Thread.SetData(fieldsStreamTL, null);
- }
- closed = true;
- }
- }
-
- public int Size()
- {
- return size;
- }
-
- public Document Doc(int n, FieldSelector fieldSelector)
- {
- indexStream.Seek((n + docStoreOffset) * 8L);
- long position = indexStream.ReadLong();
- fieldsStream.Seek(position);
-
- Document doc = new Document();
- int numFields = fieldsStream.ReadVInt();
- for (int i = 0; i < numFields; i++)
- {
- int fieldNumber = fieldsStream.ReadVInt();
- FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
- FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.Accept(fi.name);
-
- byte bits = fieldsStream.ReadByte();
- System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);
-
- bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
- bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
- bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- //TODO: Find an alternative approach here if this list continues to grow beyond the
- //list of 5 or 6 currently here. See Lucene 762 for discussion
- if (acceptField.Equals(FieldSelectorResult.LOAD))
- {
- AddField(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
- {
- AddFieldForMerge(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
- {
- AddField(doc, fi, binary, compressed, tokenize);
- break; //Get out of this loop
- }
- else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
- {
- AddFieldLazy(doc, fi, binary, compressed, tokenize);
- }
- else if (acceptField.Equals(FieldSelectorResult.SIZE))
- {
- SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
- }
- else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
- {
- AddFieldSize(doc, fi, binary, compressed);
- break;
- }
- else
- {
- SkipField(binary, compressed);
- }
- }
-
- return doc;
- }
-
- /// <summary>Returns the length in bytes of each raw document in a
- /// contiguous range of length numDocs starting with
- /// startDocID. Returns the IndexInput (the fieldStream),
- /// already seeked to the starting point for startDocID.
- /// </summary>
- internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
- {
- indexStream.Seek((docStoreOffset + startDocID) * 8L);
- long startOffset = indexStream.ReadLong();
- long lastOffset = startOffset;
- int count = 0;
- while (count < numDocs)
- {
- long offset;
- int docID = docStoreOffset + startDocID + count + 1;
- System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
- if (docID < numTotalDocs)
- offset = indexStream.ReadLong();
- else
- offset = fieldsStream.Length();
- lengths[count++] = (int) (offset - lastOffset);
- lastOffset = offset;
- }
-
- fieldsStream.Seek(startOffset);
-
- return fieldsStream;
- }
-
- /// <summary> Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
- /// This will have the most payoff on large fields.
- /// </summary>
- private void SkipField(bool binary, bool compressed)
- {
- SkipField(binary, compressed, fieldsStream.ReadVInt());
- }
-
- private void SkipField(bool binary, bool compressed, int toRead)
- {
- if (binary || compressed)
- {
- long pointer = fieldsStream.GetFilePointer();
- fieldsStream.Seek(pointer + toRead);
- }
- else
- {
- //We need to skip chars. This will slow us down, but still better
- fieldsStream.SkipChars(toRead);
- }
- }
-
- private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
- {
- if (binary == true)
- {
- int toRead = fieldsStream.ReadVInt();
- long pointer = fieldsStream.GetFilePointer();
- if (compressed)
- {
- //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
- doc.Add(new LazyField(this, fi.name, Field.Store.COMPRESS, toRead, pointer));
- }
- else
- {
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer));
- }
- //Need to move the pointer ahead by toRead positions
- fieldsStream.Seek(pointer + toRead);
- }
- else
- {
- Field.Store store = Field.Store.YES;
- Field.Index index = GetIndexType(fi, tokenize);
- Field.TermVector termVector = GetTermVectorType(fi);
-
- Fieldable f;
- if (compressed)
- {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.ReadVInt();
- long pointer = fieldsStream.GetFilePointer();
- f = new LazyField(this, fi.name, store, toRead, pointer);
- //skip over the part that we aren't loading
- fieldsStream.Seek(pointer + toRead);
- f.SetOmitNorms(fi.omitNorms);
- }
- else
- {
- int length = fieldsStream.ReadVInt();
- long pointer = fieldsStream.GetFilePointer();
- //Skip ahead of where we are by the length of what is stored
- fieldsStream.SkipChars(length);
- f = new LazyField(this, fi.name, store, index, termVector, length, pointer);
- f.SetOmitNorms(fi.omitNorms);
- }
- doc.Add(f);
- }
- }
-
- // in merge mode we don't uncompress the data of a compressed field
- private void AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
- {
- System.Object data;
-
- if (binary || compressed)
- {
- int toRead = fieldsStream.ReadVInt();
- byte[] b = new byte[toRead];
- fieldsStream.ReadBytes(b, 0, b.Length);
- data = b;
- }
- else
- {
- data = fieldsStream.ReadString();
- }
-
- doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
- }
-
- private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
- {
-
- //we have a binary stored field, and it may be compressed
- if (binary)
- {
- int toRead = fieldsStream.ReadVInt();
- byte[] b = new byte[toRead];
- fieldsStream.ReadBytes(b, 0, b.Length);
- if (compressed)
- doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
- else
- doc.Add(new Field(fi.name, b, Field.Store.YES));
- }
- else
- {
- Field.Store store = Field.Store.YES;
- Field.Index index = GetIndexType(fi, tokenize);
- Field.TermVector termVector = GetTermVectorType(fi);
-
- Fieldable f;
- if (compressed)
- {
- store = Field.Store.COMPRESS;
- int toRead = fieldsStream.ReadVInt();
-
- byte[] b = new byte[toRead];
- fieldsStream.ReadBytes(b, 0, b.Length);
- f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
- f.SetOmitNorms(fi.omitNorms);
- }
- else
- {
- f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
- f.SetOmitNorms(fi.omitNorms);
- }
- doc.Add(f);
- }
- }
-
- // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
- // Read just the size -- caller must skip the field content to continue reading fields
- // Return the size in bytes or chars, depending on field type
- private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
- {
- int size = fieldsStream.ReadVInt(), bytesize = binary || compressed ? size : 2 * size;
- byte[] sizebytes = new byte[4];
- sizebytes[0] = (byte) (SupportClass.Number.URShift(bytesize, 24));
- sizebytes[1] = (byte) (SupportClass.Number.URShift(bytesize, 16));
- sizebytes[2] = (byte) (SupportClass.Number.URShift(bytesize, 8));
- sizebytes[3] = (byte) bytesize;
- doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
- return size;
- }
-
- private Field.TermVector GetTermVectorType(FieldInfo fi)
- {
- Field.TermVector termVector = null;
- if (fi.storeTermVector)
- {
- if (fi.storeOffsetWithTermVector)
- {
- if (fi.storePositionWithTermVector)
- {
- termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
- }
- else
- {
- termVector = Field.TermVector.WITH_OFFSETS;
- }
- }
- else if (fi.storePositionWithTermVector)
- {
- termVector = Field.TermVector.WITH_POSITIONS;
- }
- else
- {
- termVector = Field.TermVector.YES;
- }
- }
- else
- {
- termVector = Field.TermVector.NO;
- }
- return termVector;
- }
-
- private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
- {
- Field.Index index;
- if (fi.isIndexed && tokenize)
- index = Field.Index.TOKENIZED;
- else if (fi.isIndexed && !tokenize)
- index = Field.Index.UN_TOKENIZED;
- else
- index = Field.Index.NO;
- return index;
- }
-
- /// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
- /// loaded.
- /// </summary>
- [Serializable]
- private class LazyField:AbstractField, Fieldable
- {
- private void InitBlock(FieldsReader enclosingInstance)
- {
- this.enclosingInstance = enclosingInstance;
- }
- private FieldsReader enclosingInstance;
- public FieldsReader Enclosing_Instance
- {
- get
- {
- return enclosingInstance;
- }
-
- }
- private int toRead;
- private long pointer;
-
- public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer):base(name, store, Field.Index.NO, Field.TermVector.NO)
- {
- InitBlock(enclosingInstance);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
-
- public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer):base(name, store, index, termVector)
- {
- InitBlock(enclosingInstance);
- this.toRead = toRead;
- this.pointer = pointer;
- lazy = true;
- }
-
- private IndexInput GetFieldStream()
- {
- IndexInput localFieldsStream = (IndexInput) System.Threading.Thread.GetData(Enclosing_Instance.fieldsStreamTL);
- if (localFieldsStream == null)
- {
- localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone();
- System.Threading.Thread.SetData(Enclosing_Instance.fieldsStreamTL, localFieldsStream);
- }
- return localFieldsStream;
- }
-
- /// <summary>The value of the field in Binary, or null. If null, the Reader value,
- /// String value, or TokenStream value is used. Exactly one of stringValue(),
- /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
- /// </summary>
- public override byte[] BinaryValue()
- {
- Enclosing_Instance.EnsureOpen();
- if (fieldsData == null)
- {
- byte[] b = new byte[toRead];
- IndexInput localFieldsStream = GetFieldStream();
- //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
- //since they are already handling this exception when getting the document
- try
- {
- localFieldsStream.Seek(pointer);
- localFieldsStream.ReadBytes(b, 0, b.Length);
- if (isCompressed == true)
- {
- fieldsData = Enclosing_Instance.Uncompress(b);
- }
- else
- {
- fieldsData = b;
- }
- }
- catch (System.IO.IOException e)
- {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData is byte[] ? (byte[]) fieldsData : null;
- }
-
- /// <summary>The value of the field as a Reader, or null. If null, the String value,
- /// binary value, or TokenStream value is used. Exactly one of stringValue(),
- /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
- /// </summary>
- public override System.IO.TextReader ReaderValue()
- {
- Enclosing_Instance.EnsureOpen();
- return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null;
- }
-
- /// <summary>The value of the field as a TokesStream, or null. If null, the Reader value,
- /// String value, or binary value is used. Exactly one of stringValue(),
- /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
- /// </summary>
- public override TokenStream TokenStreamValue()
- {
- Enclosing_Instance.EnsureOpen();
- return fieldsData is TokenStream ? (TokenStream) fieldsData : null;
- }
-
-
- /// <summary>The value of the field as a String, or null. If null, the Reader value,
- /// binary value, or TokenStream value is used. Exactly one of stringValue(),
- /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
- /// </summary>
- public override System.String StringValue()
- {
- Enclosing_Instance.EnsureOpen();
- if (fieldsData == null)
- {
- IndexInput localFieldsStream = GetFieldStream();
- try
- {
- localFieldsStream.Seek(pointer);
- if (isCompressed)
- {
- byte[] b = new byte[toRead];
- localFieldsStream.ReadBytes(b, 0, b.Length);
- fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
- }
- else
- {
- //read in chars b/c we already know the length we need to read
- char[] chars = new char[toRead];
- localFieldsStream.ReadChars(chars, 0, toRead);
- fieldsData = new System.String(chars);
- }
- }
- catch (System.IO.IOException e)
- {
- throw new FieldReaderException(e);
- }
- }
- return fieldsData is System.String ? (System.String) fieldsData : null;
- }
-
- public long GetPointer()
- {
- Enclosing_Instance.EnsureOpen();
- return pointer;
- }
-
- public void SetPointer(long pointer)
- {
- Enclosing_Instance.EnsureOpen();
- this.pointer = pointer;
- }
-
- public int GetToRead()
- {
- Enclosing_Instance.EnsureOpen();
- return toRead;
- }
-
- public void SetToRead(int toRead)
- {
- Enclosing_Instance.EnsureOpen();
- this.toRead = toRead;
- }
- }
-
- private byte[] Uncompress(byte[] input)
- {
- return SupportClass.CompressionSupport.Uncompress(input);
- }
-
- // Instances of this class hold field properties and data
- // for merge
- [Serializable]
- public sealed class FieldForMerge : AbstractField
- {
- public override System.String StringValue()
- {
- return (System.String) this.fieldsData;
- }
-
- public override System.IO.TextReader ReaderValue()
- {
- // not needed for merge
- return null;
- }
-
- public override byte[] BinaryValue()
- {
- return (byte[]) this.fieldsData;
- }
-
- public override TokenStream TokenStreamValue()
- {
- // not needed for merge
- return null;
- }
-
- public FieldForMerge(System.Object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
- {
- this.isStored = true;
- this.fieldsData = value_Renamed;
- this.isCompressed = compressed;
- this.isBinary = binary;
- this.isTokenized = tokenize;
-
- this.name = String.Intern(fi.name);
- this.isIndexed = fi.isIndexed;
- this.omitNorms = fi.omitNorms;
- this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
- this.storePositionWithTermVector = fi.storePositionWithTermVector;
- this.storeTermVector = fi.storeTermVector;
- }
- }
- }
+
+ /// <summary> Class responsible for access to stored document fields.
+ /// <p/>
+ /// It uses <segment>.fdt and <segment>.fdx; files.
+ /// </summary>
+ public sealed class FieldsReader
+ {
+ private FieldInfos fieldInfos;
+
+ // The main fieldStream, used only for cloning.
+ private IndexInput cloneableFieldsStream;
+
+ // This is a clone of cloneableFieldsStream used for reading documents.
+ // It should not be cloned outside of a synchronized context.
+ private IndexInput fieldsStream;
+
+ private IndexInput indexStream;
+ private int numTotalDocs;
+ private int size;
+ private bool closed;
+ private readonly int format;
+ private readonly int formatSize;
+
+ // The docID offset where our docs begin in the index
+ // file. This will be 0 if we have our own private file.
+ private int docStoreOffset;
+
+ //private System.LocalDataStoreSlot fieldsStreamTL = System.Threading.Thread.AllocateDataSlot();
+ private CloseableThreadLocal fieldsStreamTL = new CloseableThreadLocal();
+
+ public FieldsReader(Directory d, System.String segment, FieldInfos fn)
+ : this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0)
+ {
+ }
+
+ internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize)
+ : this(d, segment, fn, readBufferSize, -1, 0)
+ {
+ }
+
+ internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
+ {
+ bool success = false;
+
+ try
+ {
+ fieldInfos = fn;
+
+ cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
+ indexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
+
+ // First version of fdx did not include a format
+ // header, but, the first int will always be 0 in that
+ // case
+ int firstInt = indexStream.ReadInt();
+ if (firstInt == 0)
+ format = 0;
+ else
+ format = firstInt;
+
+ if (format > FieldsWriter.FORMAT_CURRENT)
+ throw new CorruptIndexException("Incompatible format version: " + format + " expected "
+ + FieldsWriter.FORMAT_CURRENT + " or lower");
+
+ if (format > FieldsWriter.FORMAT)
+ formatSize = 4;
+ else
+ formatSize = 0;
+
+ if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ cloneableFieldsStream.SetModifiedUTF8StringsMode();
+
+ fieldsStream = (IndexInput)cloneableFieldsStream.Clone();
+
+ long indexSize = indexStream.Length() - formatSize;
+
+ if (docStoreOffset != -1)
+ {
+ // We read only a slice out of this shared fields file
+ this.docStoreOffset = docStoreOffset;
+ this.size = size;
+
+ // Verify the file is long enough to hold all of our
+ // docs
+ System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + docStoreOffset);
+ }
+ else
+ {
+ this.docStoreOffset = 0;
+ this.size = (int)(indexSize >> 3);
+ }
+
+ numTotalDocs = (int)(indexSize >> 3);
+ success = true;
+ }
+ finally
+ {
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success)
+ {
+ Close();
+ }
+ }
+ }
+
+ /// <throws> AlreadyClosedException if this FieldsReader is closed </throws>
+ internal void EnsureOpen()
+ {
+ if (closed)
+ {
+ throw new AlreadyClosedException("this FieldsReader is closed");
+ }
+ }
+
+ /// <summary> Closes the underlying {@link Lucene.Net.Store.IndexInput} streams, including any ones associated with a
+ /// lazy implementation of a Field. This means that the Fields values will not be accessible.
+ ///
+ /// </summary>
+ /// <throws> IOException </throws>
+ public void Close()
+ {
+ if (!closed)
+ {
+ if (fieldsStream != null)
+ {
+ fieldsStream.Close();
+ }
+ if (cloneableFieldsStream != null)
+ {
+ cloneableFieldsStream.Close();
+ }
+ if (indexStream != null)
+ {
+ indexStream.Close();
+ }
+ fieldsStreamTL.Close();
+ closed = true;
+ }
+ }
+
+ public int Size()
+ {
+ return size;
+ }
+
+ private void SeekIndex(int docID)
+ {
+ indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
+ }
+
+ internal bool CanReadRawDocs()
+ {
+ return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+ }
+
+ public Document Doc(int n, FieldSelector fieldSelector)
+ {
+ SeekIndex(n);
+ long position = indexStream.ReadLong();
+ fieldsStream.Seek(position);
+
+ Document doc = new Document();
+ int numFields = fieldsStream.ReadVInt();
+ for (int i = 0; i < numFields; i++)
+ {
+ int fieldNumber = fieldsStream.ReadVInt();
+ FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
+ FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.Accept(fi.name);
+
+ byte bits = fieldsStream.ReadByte();
+ System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);
+
+ bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
+ bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
+ bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
+ //TODO: Find an alternative approach here if this list continues to grow beyond the
+ //list of 5 or 6 currently here. See Lucene 762 for discussion
+ if (acceptField.Equals(FieldSelectorResult.LOAD))
+ {
+ AddField(doc, fi, binary, compressed, tokenize);
+ }
+ else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
+ {
+ AddFieldForMerge(doc, fi, binary, compressed, tokenize);
+ }
+ else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
+ {
+ AddField(doc, fi, binary, compressed, tokenize);
+ break; //Get out of this loop
+ }
+ else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
+ {
+ AddFieldLazy(doc, fi, binary, compressed, tokenize);
+ }
+ else if (acceptField.Equals(FieldSelectorResult.SIZE))
+ {
+ SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
+ }
+ else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
+ {
+ AddFieldSize(doc, fi, binary, compressed);
+ break;
+ }
+ else
+ {
+ SkipField(binary, compressed);
+ }
+ }
+
+ return doc;
+ }
+
+ /// <summary>Returns the length in bytes of each raw document in a
+ /// contiguous range of length numDocs starting with
+ /// startDocID. Returns the IndexInput (the fieldStream),
+ /// already seeked to the starting point for startDocID.
+ /// </summary>
+ internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
+ {
+ SeekIndex(startDocID);
+ long startOffset = indexStream.ReadLong();
+ long lastOffset = startOffset;
+ int count = 0;
+ while (count < numDocs)
+ {
+ long offset;
+ int docID = docStoreOffset + startDocID + count + 1;
+ System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
+ if (docID < numTotalDocs)
+ offset = indexStream.ReadLong();
+ else
+ offset = fieldsStream.Length();
+ lengths[count++] = (int)(offset - lastOffset);
+ lastOffset = offset;
+ }
+
+ fieldsStream.Seek(startOffset);
+
+ return fieldsStream;
+ }
+
+ /// <summary> Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
+ /// This will have the most payoff on large fields.
+ /// </summary>
+ private void SkipField(bool binary, bool compressed)
+ {
+ SkipField(binary, compressed, fieldsStream.ReadVInt());
+ }
+
+ private void SkipField(bool binary, bool compressed, int toRead)
+ {
+ if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed)
+ {
+ fieldsStream.Seek(fieldsStream.GetFilePointer() + toRead);
+ }
+ else
+ {
+ //We need to skip chars. This will slow us down, but still better
+ fieldsStream.SkipChars(toRead);
+ }
+ }
+
+ private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+ {
+ if (binary)
+ {
+ int toRead = fieldsStream.ReadVInt();
+ long pointer = fieldsStream.GetFilePointer();
+ if (compressed)
+ {
+ //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
+ doc.Add(new LazyField(this, fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
+ }
+ else
+ {
+ //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
+ doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary));
+ }
+ //Need to move the pointer ahead by toRead positions
+ fieldsStream.Seek(pointer + toRead);
+ }
+ else
+ {
+ Field.Store store = Field.Store.YES;
+ Field.Index index = GetIndexType(fi, tokenize);
+ Field.TermVector termVector = GetTermVectorType(fi);
+
+ Fieldable f;
+ if (compressed)
+ {
+ store = Field.Store.COMPRESS;
+ int toRead = fieldsStream.ReadVInt();
+ long pointer = fieldsStream.GetFilePointer();
+ f = new LazyField(this, fi.name, store, toRead, pointer, binary);
+ //skip over the part that we aren't loading
+ fieldsStream.Seek(pointer + toRead);
+ f.SetOmitNorms(fi.omitNorms);
+ }
+ else
+ {
+ int length = fieldsStream.ReadVInt();
+ long pointer = fieldsStream.GetFilePointer();
+ //Skip ahead of where we are by the length of what is stored
+ if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ fieldsStream.Seek(pointer + length);
+ else
+ fieldsStream.SkipChars(length);
+ f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary);
+ f.SetOmitNorms(fi.omitNorms);
+ }
+ doc.Add(f);
+ }
+ }
+
+ // in merge mode we don't uncompress the data of a compressed field
+ private void AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+ {
+ object data;
+
+ if (binary || compressed)
+ {
+ int toRead = fieldsStream.ReadVInt();
+ byte[] b = new byte[toRead];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ data = b;
+ }
+ else
+ {
+ data = fieldsStream.ReadString();
+ }
+
+ doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
+ }
+
+ private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+ {
+
+ //we have a binary stored field, and it may be compressed
+ if (binary)
+ {
+ int toRead = fieldsStream.ReadVInt();
+ byte[] b = new byte[toRead];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ if (compressed)
+ doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
+ else
+ doc.Add(new Field(fi.name, b, Field.Store.YES));
+ }
+ else
+ {
+ Field.Store store = Field.Store.YES;
+ Field.Index index = GetIndexType(fi, tokenize);
+ Field.TermVector termVector = GetTermVectorType(fi);
+
+ Fieldable f;
+ if (compressed)
+ {
+ store = Field.Store.COMPRESS;
+ int toRead = fieldsStream.ReadVInt();
+
+ byte[] b = new byte[toRead];
+ fieldsStream.ReadBytes(b, 0, b.Length);
+ f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
+ f.SetOmitNorms(fi.omitNorms);
+ }
+ else
+ {
+ f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
+ f.SetOmitNorms(fi.omitNorms);
+ }
+ doc.Add(f);
+ }
+ }
+
+ // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
+ // Read just the size -- caller must skip the field content to continue reading fields
+ // Return the size in bytes or chars, depending on field type
+ private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
+ {
+ int size = fieldsStream.ReadVInt(), bytesize = binary || compressed ? size : 2 * size;
+ byte[] sizebytes = new byte[4];
+ sizebytes[0] = (byte)(SupportClass.Number.URShift(bytesize, 24));
+ sizebytes[1] = (byte)(SupportClass.Number.URShift(bytesize, 16));
+ sizebytes[2] = (byte)(SupportClass.Number.URShift(bytesize, 8));
+ sizebytes[3] = (byte)bytesize;
+ doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
+ return size;
+ }
+
+ private Field.TermVector GetTermVectorType(FieldInfo fi)
+ {
+ Field.TermVector termVector = null;
+ if (fi.storeTermVector)
+ {
+ if (fi.storeOffsetWithTermVector)
+ {
+ if (fi.storePositionWithTermVector)
+ {
+ termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ else
+ {
+ termVector = Field.TermVector.WITH_OFFSETS;
+ }
+ }
+ else if (fi.storePositionWithTermVector)
+ {
+ termVector = Field.TermVector.WITH_POSITIONS;
+ }
+ else
+ {
+ termVector = Field.TermVector.YES;
+ }
+ }
+ else
+ {
+ termVector = Field.TermVector.NO;
+ }
+ return termVector;
+ }
+
+ private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
+ {
+ Field.Index index;
+ if (fi.isIndexed && tokenize)
+ index = Field.Index.ANALYZED;
+ else if (fi.isIndexed && !tokenize)
+ index = Field.Index.NOT_ANALYZED;
+ else
+ index = Field.Index.NO;
+ return index;
+ }
+
+ /// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+ /// loaded.
+ /// </summary>
+ [Serializable]
+ private class LazyField : AbstractField, Fieldable
+ {
+ private void InitBlock(FieldsReader enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private FieldsReader enclosingInstance;
+ public FieldsReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ private int toRead;
+ private long pointer;
+
+ public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary)
+ : base(name, store, Field.Index.NO, Field.TermVector.NO)
+ {
+ InitBlock(enclosingInstance);
+ this.toRead = toRead;
+ this.pointer = pointer;
+ this.isBinary = isBinary;
+ if (isBinary)
+ binaryLength = toRead;
+ lazy = true;
+ }
+
+ public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary)
+ : base(name, store, index, termVector)
+ {
+ InitBlock(enclosingInstance);
+ this.toRead = toRead;
+ this.pointer = pointer;
+ this.isBinary = isBinary;
+ if (isBinary)
+ binaryLength = toRead;
+ lazy = true;
+ }
+
+ private IndexInput GetFieldStream()
+ {
+ IndexInput localFieldsStream = (IndexInput)Enclosing_Instance.fieldsStreamTL.Get();
+ if (localFieldsStream == null)
+ {
+ localFieldsStream = (IndexInput)Enclosing_Instance.cloneableFieldsStream.Clone();
+ Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream);
+ }
+ return localFieldsStream;
+ }
+
+ /// <summary>The value of the field in Binary, or null. If null, the Reader value,
+ /// String value, or TokenStream value is used. Exactly one of stringValue(),
+ /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
+ /// </summary>
+ public override byte[] BinaryValue()
+ {
+ return GetBinaryValue(null);
+ }
+
+ /// <summary>The value of the field as a Reader, or null. If null, the String value,
+ /// binary value, or TokenStream value is used. Exactly one of stringValue(),
+ /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
+ /// </summary>
+ public override System.IO.TextReader ReaderValue()
+ {
+ Enclosing_Instance.EnsureOpen();
+ return null;
+ }
+
+ /// <summary>The value of the field as a TokesStream, or null. If null, the Reader value,
+ /// String value, or binary value is used. Exactly one of stringValue(),
+ /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
+ /// </summary>
+ public override TokenStream TokenStreamValue()
+ {
+ Enclosing_Instance.EnsureOpen();
+ return null;
+ }
+
+
+ /// <summary>The value of the field as a String, or null. If null, the Reader value,
+ /// binary value, or TokenStream value is used. Exactly one of stringValue(),
+ /// readerValue(), binaryValue(), and tokenStreamValue() must be set.
+ /// </summary>
+ public override System.String StringValue()
+ {
+ Enclosing_Instance.EnsureOpen();
+ if (isBinary)
+ return null;
+ else
+ {
+ if (fieldsData == null)
+ {
+ IndexInput localFieldsStream = GetFieldStream();
+ try
+ {
+ localFieldsStream.Seek(pointer);
+ if (isCompressed)
+ {
+ byte[] b = new byte[toRead];
+ localFieldsStream.ReadBytes(b, 0, b.Length);
+ fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
+ }
+ else
+ {
+ if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+ {
+ byte[] bytes = new byte[toRead];
+ localFieldsStream.ReadBytes(bytes, 0, toRead);
+ fieldsData = System.Text.Encoding.UTF8.GetString(bytes);
+ }
+ else
+ {
+ //read in chars b/c we already know the length we need to read
+ char[] chars = new char[toRead];
+ localFieldsStream.ReadChars(chars, 0, toRead);
+ fieldsData = new System.String(chars);
+ }
+ }
+ }
+ catch (System.IO.IOException e)
+ {
+ throw new FieldReaderException(e);
+ }
+ }
+ }
+ return (string)fieldsData;
+ }
+
+ public long GetPointer()
+ {
+ Enclosing_Instance.EnsureOpen();
+ return pointer;
+ }
+
+ public void SetPointer(long pointer)
+ {
+ Enclosing_Instance.EnsureOpen();
+ this.pointer = pointer;
+ }
+
+ public int GetToRead()
+ {
+ Enclosing_Instance.EnsureOpen();
+ return toRead;
+ }
+
+ public void SetToRead(int toRead)
+ {
+ Enclosing_Instance.EnsureOpen();
+ this.toRead = toRead;
+ }
+
+ public override byte[] GetBinaryValue(byte[] result)
+ {
+ Enclosing_Instance.EnsureOpen();
+
+ if (isBinary)
+ {
+ if (fieldsData == null)
+ {
+ // Allocate new bufer if result is null or too small
+ byte[] b;
+ if (result == null || result.Length < toRead)
+ b = new byte[toRead];
+ else
+ b = result;
+
+ IndexInput localFieldsStream = GetFieldStream();
+
+ // Throw this IOException since IndexRead.document does so anyway, so probably not that big of a change for people
+ // since they are already handlinig this exception when getting the document
+ try
+ {
+ localFieldsStream.Seek(pointer);
+ localFieldsStream.ReadBytes(b, 0, toRead);
+ if (isCompressed)
+ fieldsData = Enclosing_Instance.Uncompress(b);
+ else
+ fieldsData = b;
+ }
+ catch (System.IO.IOException e)
+ {
+ throw new FieldReaderException(e);
+ }
+
+ binaryOffset = 0;
+ binaryLength = toRead;
+ }
+
+ return (byte[])fieldsData;
+ }
+ else
+ return null;
+ }
+ }
+
+ private byte[] Uncompress(byte[] input)
+ {
+ return SupportClass.CompressionSupport.Uncompress(input);
+ }
+
+ // Instances of this class hold field properties and data
+ // for merge
+ [Serializable]
+ public sealed class FieldForMerge : AbstractField
+ {
+ public override System.String StringValue()
+ {
+ return (System.String)this.fieldsData;
+ }
+
+ public override System.IO.TextReader ReaderValue()
+ {
+ // not needed for merge
+ return null;
+ }
+
+ public override byte[] BinaryValue()
+ {
+ return (byte[])this.fieldsData;
+ }
+
+ public override TokenStream TokenStreamValue()
+ {
+ // not needed for merge
+ return null;
+ }
+
+ public FieldForMerge(object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+ {
+ this.isStored = true;
+ this.fieldsData = value_Renamed;
+ this.isCompressed = compressed;
+ this.isBinary = binary;
+ if (isBinary)
+ binaryLength = ((byte[])value_Renamed).Length;
+
+ this.isTokenized = tokenize;
+
+ this.name = String.Intern(fi.name);
+ this.isIndexed = fi.isIndexed;
+ this.omitNorms = fi.omitNorms;
+ this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
+ this.storePositionWithTermVector = fi.storePositionWithTermVector;
+ this.storeTermVector = fi.storeTermVector;
+ }
+ }
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldsWriter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs Wed Jul 29 18:04:12 2009
@@ -33,6 +33,17 @@
internal const byte FIELD_IS_BINARY = (byte) (0x2);
internal const byte FIELD_IS_COMPRESSED = (byte) (0x4);
+ // Original format
+ internal const int FORMAT = 0;
+
+ // Changed strings to UTF8
+ internal const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
+
+ // NOTE: if you introduce a new format, make it 1 higher
+ // than the current one, and always change this if you
+ // switch to a new format!
+ internal const int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+
private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
@@ -44,9 +55,77 @@
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn)
{
fieldInfos = fn;
- fieldsStream = d.CreateOutput(segment + ".fdt");
- indexStream = d.CreateOutput(segment + ".fdx");
- doClose = true;
+
+ bool success = false;
+ string fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
+ try
+ {
+ fieldsStream = d.CreateOutput(fieldsName);
+ fieldsStream.WriteInt(FORMAT_CURRENT);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ Close();
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ try
+ {
+ d.DeleteFile(fieldsName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ }
+
+ success = false;
+ string indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
+ try
+ {
+ indexStream = d.CreateOutput(indexName);
+ indexStream.WriteInt(FORMAT_CURRENT);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ Close();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ try
+ {
+ d.DeleteFile(fieldsName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ try
+ {
+ d.DeleteFile(indexName);
+ }
+ catch (System.Exception)
+ {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ }
+
+ doClose = true;
}
internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn)
@@ -56,6 +135,11 @@
indexStream = fdx;
doClose = false;
}
+
+ internal void SetFieldsStream(IndexOutput stream)
+ {
+ this.fieldsStream = stream;
+ }
// Writes the contents of buffer into the fields stream
// and adds a new entry for this document into the index
@@ -67,7 +151,13 @@
fieldsStream.WriteVInt(numStoredFields);
buffer.WriteTo(fieldsStream);
}
-
+
+ internal void SkipDocument()
+ {
+ indexStream.WriteLong(fieldsStream.GetFilePointer());
+ fieldsStream.WriteVInt(0);
+ }
+
internal void Flush()
{
indexStream.Flush();
@@ -76,12 +166,60 @@
internal void Close()
{
- if (doClose)
- {
- fieldsStream.Close();
- indexStream.Close();
- }
- }
+ if (doClose)
+ {
+ try
+ {
+ if (fieldsStream != null)
+ {
+ try
+ {
+ fieldsStream.Close();
+ }
+ finally
+ {
+ fieldsStream = null;
+ }
+ }
+ }
+ catch (System.IO.IOException ioe)
+ {
+ try
+ {
+ if (indexStream != null)
+ {
+ try
+ {
+ indexStream.Close();
+ }
+ finally
+ {
+ indexStream = null;
+ }
+ }
+ }
+ catch (System.IO.IOException)
+ {
+ // Ignore so we throw only first IOException hit
+ }
+ throw ioe;
+ }
+ finally
+ {
+ if (indexStream != null)
+ {
+ try
+ {
+ indexStream.Close();
+ }
+ finally
+ {
+ indexStream = null;
+ }
+ }
+ }
+ }
+ }
internal void WriteField(FieldInfo fi, Fieldable field)
{
@@ -103,39 +241,46 @@
if (field.IsCompressed())
{
// compression is enabled for the current field
- byte[] data = null;
+ byte[] data;
+ int len;
+ int offset;
if (disableCompression)
{
// optimized case for merging, the data
// is already compressed
- data = field.BinaryValue();
+ data = field.GetBinaryValue();
+ System.Diagnostics.Debug.Assert(data != null);
+ len = field.GetBinaryLength();
+ offset = field.GetBinaryOffset();
}
else
{
// check if it is a binary field
if (field.IsBinary())
{
- data = Compress(field.BinaryValue());
+ data = Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
}
else
{
- data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
+ byte[] x = System.Text.Encoding.UTF8.GetBytes(field.StringValue());
+ data = Compress(x, 0, x.Length);
}
+ len = data.Length;
+ offset = 0;
}
- int len = data.Length;
+
fieldsStream.WriteVInt(len);
- fieldsStream.WriteBytes(data, len);
+ fieldsStream.WriteBytes(data, offset, len);
}
else
{
// compression is disabled for the current field
if (field.IsBinary())
{
- byte[] data = field.BinaryValue();
- int len = data.Length;
- fieldsStream.WriteVInt(len);
- fieldsStream.WriteBytes(data, len);
+ int length = field.GetBinaryLength();
+ fieldsStream.WriteVInt(length);
+ fieldsStream.WriteBytes(field.BinaryValue(), field.GetBinaryOffset(), length);
}
else
{
@@ -186,9 +331,9 @@
}
}
- private byte[] Compress(byte[] input)
+ private byte[] Compress(byte[] input, int offset, int length)
{
- return SupportClass.CompressionSupport.Compress(input);
+ return SupportClass.CompressionSupport.Compress(input, offset, length);
}
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FilterIndexReader.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs Wed Jul 29 18:04:12 2009
@@ -284,7 +284,7 @@
}
- public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
+ public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
{
EnsureOpen();
return in_Renamed.GetFieldNames(fieldNames);
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FreqProxFieldMergeState.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FreqProxFieldMergeState.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FreqProxFieldMergeState.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FreqProxFieldMergeState.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+ /// <summary>
+ /// Used by DocumentsWriter to merge the postings from
+ /// multiple ThreadStates when creating a segment
+ /// </summary>
+ internal sealed class FreqProxFieldMergeState
+ {
+ internal readonly FreqProxTermsWriterPerField field;
+ internal readonly int numPostings;
+ internal readonly CharBlockPool charPool;
+ internal readonly RawPostingList[] postings;
+
+ private FreqProxTermsWriter.PostingList p;
+ internal char[] text;
+ internal int textOffset;
+
+ private int postingUpto = -1;
+
+ internal readonly ByteSliceReader freq = new ByteSliceReader();
+ internal readonly ByteSliceReader prox = new ByteSliceReader();
+
+ internal int docID;
+ internal int termFreq;
+
+ public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
+ {
+ this.field = field;
+ this.charPool = field.perThread.termsHashPerThread.charPool;
+ this.numPostings = field.termsHashPerField.numPostings;
+ this.postings = field.termsHashPerField.sortPostings();
+ }
+
+ internal bool nextTerm()
+ {
+ postingUpto++;
+ if (postingUpto == numPostings)
+ return false;
+
+ p = (FreqProxTermsWriter.PostingList)postings[postingUpto];
+ docID = 0;
+
+ text = charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+ field.termsHashPerField.initReader(freq, p, 0);
+ if (!field.fieldInfo.omitTf)
+ field.termsHashPerField.initReader(prox, p, 1);
+
+ // Should always be true
+ bool result = nextDoc();
+ System.Diagnostics.Debug.Assert(result);
+
+ return true;
+ }
+
+ public bool nextDoc()
+ {
+ if (freq.Eof())
+ {
+ if (p.lastDocCode != -1)
+ {
+ // Return last doc
+ docID = p.lastDocID;
+ if (!field.omitTf)
+ termFreq = p.docFreq;
+ p.lastDocCode = -1;
+ return true;
+ }
+ else
+ // EOF
+ return false;
+ }
+
+ int code = freq.ReadVInt();
+ if (field.omitTf)
+ docID += code;
+ else
+ {
+ docID += (int)((uint)code >> 1);
+ if ((code & 1) != 0)
+ termFreq = 1;
+ else
+ termFreq = freq.ReadVInt();
+ }
+
+ System.Diagnostics.Debug.Assert(docID != p.lastDocID);
+
+ return true;
+ }
+ }
+}