You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2008/06/25 04:52:24 UTC
svn commit: r671404 [10/10] - /incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermPositions.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs Tue Jun 24 19:52:22 2008
@@ -38,5 +38,44 @@
 		/// the first time.
 		/// </summary>
 		int NextPosition();
+		
+		/// <summary> Returns the length of the payload at the current term position.
+		/// This is invalid until {@link #NextPosition()} is called for
+		/// the first time.<br>
+		/// </summary>
+		/// <returns> length of the current payload in number of bytes
+		/// </returns>
+		int GetPayloadLength();
+		
+		/// <summary> Returns the payload data at the current term position.
+		/// This is invalid until {@link #NextPosition()} is called for
+		/// the first time.
+		/// This method must not be called more than once after each call
+		/// of {@link #NextPosition()}. However, payloads are loaded lazily,
+		/// so if the payload data for the current position is not needed,
+		/// this method may not be called at all for performance reasons.<br>
+		/// 
+		/// </summary>
+		/// <param name="data">the array into which the data of this payload is to be
+		/// stored, if it is big enough; otherwise, a new byte[] array
+		/// is allocated for this purpose. 
+		/// </param>
+		/// <param name="offset">the offset in the array into which the data of this payload
+		/// is to be stored.
+		/// </param>
+		/// <returns> a byte[] array containing the data of this payload
+		/// </returns>
+		/// <throws>  IOException </throws>
+		byte[] GetPayload(byte[] data, int offset);
+		
+		/// <summary> Checks if a payload can be loaded at this position.
+		/// <p>
+		/// Payloads can only be loaded once per call to 
+		/// {@link #NextPosition()}.
+		/// 
+		/// </summary>
+		/// <returns> true if there is a payload available at this position that can be loaded
+		/// </returns>
+		bool IsPayloadAvailable();
 	}
 }
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntry.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorEntry.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntry.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntry.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	/// <summary> Convenience class for holding TermVector information.</summary>
+	public class TermVectorEntry
+	{
+		private System.String field;
+		private System.String term;
+		private int frequency;
+		private TermVectorOffsetInfo[] offsets;
+		internal int[] positions;
+		
+		
+		public TermVectorEntry()
+		{
+		}
+		
+		public TermVectorEntry(System.String field, System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+		{
+			this.field = field;
+			this.term = term;
+			this.frequency = frequency;
+			this.offsets = offsets;
+			this.positions = positions;
+		}
+		
+		
+		public virtual System.String GetField()
+		{
+			return field;
+		}
+		
+		public virtual int GetFrequency()
+		{
+			return frequency;
+		}
+		
+		public virtual TermVectorOffsetInfo[] GetOffsets()
+		{
+			return offsets;
+		}
+		
+		public virtual int[] GetPositions()
+		{
+			return positions;
+		}
+		
+		public virtual System.String GetTerm()
+		{
+			return term;
+		}
+		
+		//Keep package local
+		internal virtual void  SetFrequency(int frequency)
+		{
+			this.frequency = frequency;
+		}
+		
+		internal virtual void  SetOffsets(TermVectorOffsetInfo[] offsets)
+		{
+			this.offsets = offsets;
+		}
+		
+		internal virtual void  SetPositions(int[] positions)
+		{
+			this.positions = positions;
+		}
+		
+		
+		public  override bool Equals(System.Object o)
+		{
+			if (this == o)
+				return true;
+			if (o == null || GetType() != o.GetType())
+				return false;
+			
+			TermVectorEntry that = (TermVectorEntry) o;
+			
+			if (term != null ? !term.Equals(that.term) : that.term != null)
+				return false;
+			
+			return true;
+		}
+		
+		public override int GetHashCode()
+		{
+			return (term != null ? term.GetHashCode() : 0);
+		}
+		
+		public override System.String ToString()
+		{
+			return "TermVectorEntry{" + "field='" + field + '\'' + ", term='" + term + '\'' + ", frequency=" + frequency + '}';
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	/// <summary> Compares {@link Lucene.Net.Index.TermVectorEntry}s first by frequency and then by
+	/// the term (case-sensitive)
+	/// 
+	/// 
+	/// </summary>
+	public class TermVectorEntryFreqSortedComparator : System.Collections.IComparer
+	{
+		public virtual int Compare(System.Object object_Renamed, System.Object object1)
+		{
+			int result = 0;
+			TermVectorEntry entry = (TermVectorEntry) object_Renamed;
+			TermVectorEntry entry1 = (TermVectorEntry) object1;
+			result = entry1.GetFrequency() - entry.GetFrequency();
+			if (result == 0)
+			{
+				result = String.CompareOrdinal(entry.GetTerm(), entry1.GetTerm());
+				if (result == 0)
+				{
+					result = String.CompareOrdinal(entry.GetField(), entry1.GetField());
+				}
+			}
+			return result;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorMapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorMapper.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorMapper.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorMapper.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	
+	/// <summary> The TermVectorMapper can be used to map Term Vectors into your own
+	/// structure instead of the parallel array structure used by
+	/// {@link Lucene.Net.Index.IndexReader#GetTermFreqVector(int,String)}.
+	/// <p/>
+	/// It is up to the implementation to make sure it is thread-safe.
+	/// 
+	/// 
+	/// 
+	/// </summary>
+	public abstract class TermVectorMapper
+	{
+		
+		private bool ignoringPositions;
+		private bool ignoringOffsets;
+		
+		
+		protected internal TermVectorMapper()
+		{
+		}
+		
+		/// <summary> </summary>
+		/// <param name="ignoringPositions">true if this mapper should tell Lucene to ignore positions even if they are stored
+		/// </param>
+		/// <param name="ignoringOffsets">similar to ignoringPositions
+		/// </param>
+		protected internal TermVectorMapper(bool ignoringPositions, bool ignoringOffsets)
+		{
+			this.ignoringPositions = ignoringPositions;
+			this.ignoringOffsets = ignoringOffsets;
+		}
+		
+		/// <summary> Tell the mapper what to expect in regards to field, number of terms, offset and position storage.
+		/// This method will be called once before retrieving the vector for a field.
+		/// 
+		/// This method will be called before {@link #Map(String,int,TermVectorOffsetInfo[],int[])}.
+		/// </summary>
+		/// <param name="field">The field the vector is for
+		/// </param>
+		/// <param name="numTerms">The number of terms that need to be mapped
+		/// </param>
+		/// <param name="storeOffsets">true if the mapper should expect offset information
+		/// </param>
+		/// <param name="storePositions">true if the mapper should expect positions info
+		/// </param>
+		public abstract void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions);
+		/// <summary> Map the Term Vector information into your own structure</summary>
+		/// <param name="term">The term to add to the vector
+		/// </param>
+		/// <param name="frequency">The frequency of the term in the document
+		/// </param>
+		/// <param name="offsets">null if the offset is not specified, otherwise the offset into the field of the term
+		/// </param>
+		/// <param name="positions">null if the position is not specified, otherwise the position in the field of the term
+		/// </param>
+		public abstract void  Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions);
+		
+		/// <summary> Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they
+		/// can be skipped over.  Derived classes should set this to true if they want to ignore positions.  The default
+		/// is false, meaning positions will be loaded if they are stored.
+		/// </summary>
+		/// <returns> false
+		/// </returns>
+		public virtual bool IsIgnoringPositions()
+		{
+			return ignoringPositions;
+		}
+		
+		/// <summary> </summary>
+		/// <seealso cref="IsIgnoringPositions() Same principal as {@link #IsIgnoringPositions()}, but applied to offsets.  false by default.">
+		/// </seealso>
+		/// <returns> false
+		/// </returns>
+		public virtual bool IsIgnoringOffsets()
+		{
+			return ignoringOffsets;
+		}
+		
+		/// <summary> Passes down the index of the document whose term vector is currently being mapped,
+		/// once for each top level call to a term vector reader.
+		/// <p/>
+		/// Default implementation IGNORES the document number.  Override if your implementation needs the document number.
+		/// <p/> 
+		/// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations.
+		/// 
+		/// </summary>
+		/// <param name="documentNumber">index of document currently being mapped
+		/// </param>
+		public virtual void  SetDocumentNumber(int documentNumber)
+		{
+		}
+	}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorOffsetInfo.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs Tue Jun 24 19:52:22 2008
@@ -20,8 +20,13 @@
 namespace Lucene.Net.Index
 {
 	
+	/// <summary> The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link Lucene.Net.Index.TermPositionVector}'s
+	/// offset information.  This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the
+	/// original content).
+	/// </summary>
 	public class TermVectorOffsetInfo
 	{
+		/// <summary> Convenience declaration when creating a {@link Lucene.Net.Index.TermPositionVector} that stores only position information.</summary>
 		public static readonly TermVectorOffsetInfo[] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0];
 		private int startOffset;
 		private int endOffset;
@@ -36,6 +41,9 @@
 			this.startOffset = startOffset;
 		}
 		
+		/// <summary> The accessor for the ending offset for the term</summary>
+		/// <returns> The offset
+		/// </returns>
 		public virtual int GetEndOffset()
 		{
 			return endOffset;
@@ -46,6 +54,11 @@
 			this.endOffset = endOffset;
 		}
 		
+		/// <summary> The accessor for the starting offset of the term.
+		/// 
+		/// </summary>
+		/// <returns> The offset
+		/// </returns>
 		public virtual int GetStartOffset()
 		{
 			return startOffset;
@@ -56,6 +69,11 @@
 			this.startOffset = startOffset;
 		}
 		
+		/// <summary> Two TermVectorOffsetInfos are equals if both the start and end offsets are the same</summary>
+		/// <param name="o">The comparison Object
+		/// </param>
+		/// <returns> true if both {@link #GetStartOffset()} and {@link #GetEndOffset()} are the same for both objects.
+		/// </returns>
 		public  override bool Equals(System.Object o)
 		{
 			if (this == o)

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsReader.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs Tue Jun 24 19:52:22 2008
@@ -16,16 +16,26 @@
  */
 
 using System;
+
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
 using Directory = Lucene.Net.Store.Directory;
 using IndexInput = Lucene.Net.Store.IndexInput;
 
 namespace Lucene.Net.Index
 {
 	
-	/// <version>  $Id: TermVectorsReader.java 472959 2006-11-09 16:21:50Z yonik $
+	/// <version>  $Id: TermVectorsReader.java 601337 2007-12-05 13:59:37Z mikemccand $
 	/// </version>
-	public class TermVectorsReader : System.ICloneable
+	class TermVectorsReader : System.ICloneable
 	{
+		
+		internal const int FORMAT_VERSION = 2;
+		//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
+		internal const int FORMAT_SIZE = 4;
+		
+		internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1);
+		internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x2);
+		
 		private FieldInfos fieldInfos;
 		
 		private IndexInput tvx;
@@ -33,31 +43,73 @@
 		private IndexInput tvf;
 		private int size;
 		
+		// The docID offset where our docs begin in the index
+		// file.  This will be 0 if we have our own private file.
+		private int docStoreOffset;
+		
 		private int tvdFormat;
 		private int tvfFormat;
 		
-		public TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
+		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE)
 		{
-			if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
+		}
+		
+		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize) : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
+		{
+		}
+		
+		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
+		{
+			bool success = false;
+			
+			try
+			{
+				if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
+				{
+					tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
+					CheckValidFormat(tvx);
+					tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
+					tvdFormat = CheckValidFormat(tvd);
+					tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
+					tvfFormat = CheckValidFormat(tvf);
+					if (- 1 == docStoreOffset)
+					{
+						this.docStoreOffset = 0;
+						this.size = (int) (tvx.Length() >> 3);
+					}
+					else
+					{
+						this.docStoreOffset = docStoreOffset;
+						this.size = size;
+						// Verify the file is long enough to hold all of our
+						// docs
+						System.Diagnostics.Debug.Assert(((int) (tvx.Length() / 8)) >= size + docStoreOffset);
+					}
+				}
+				
+				this.fieldInfos = fieldInfos;
+				success = true;
+			}
+			finally
 			{
-				tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION);
-				CheckValidFormat(tvx);
-				tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION);
-				tvdFormat = CheckValidFormat(tvd);
-				tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION);
-				tvfFormat = CheckValidFormat(tvf);
-				size = (int) tvx.Length() / 8;
+				// With lock-less commits, it's entirely possible (and
+				// fine) to hit a FileNotFound exception above. In
+				// this case, we want to explicitly close any subset
+				// of things that were opened so that we don't have to
+				// wait for a GC to do so.
+				if (!success)
+				{
+					Close();
+				}
 			}
-			
-			this.fieldInfos = fieldInfos;
 		}
 		
 		private int CheckValidFormat(IndexInput in_Renamed)
 		{
 			int format = in_Renamed.ReadInt();
-			if (format > TermVectorsWriter.FORMAT_VERSION)
+			if (format > FORMAT_VERSION)
 			{
-				throw new System.IO.IOException("Incompatible format version: " + format + " expected " + TermVectorsWriter.FORMAT_VERSION + " or less");
+				throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_VERSION + " or less");
 			}
 			return format;
 		}
@@ -111,26 +163,16 @@
 			return size;
 		}
 		
-		/// <summary> Retrieve the term vector for the given document and field</summary>
-		/// <param name="docNum">The document number to retrieve the vector for
-		/// </param>
-		/// <param name="field">The field within the document to retrieve
-		/// </param>
-		/// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
-		/// </returns>
-		/// <throws>  IOException if there is an error reading the term vector files </throws>
-		public virtual TermFreqVector Get(int docNum, System.String field)
+		public virtual void  Get(int docNum, System.String field, TermVectorMapper mapper)
 		{
-			// Check if no term vectors are available for this segment at all
-			int fieldNumber = fieldInfos.FieldNumber(field);
-			TermFreqVector result = null;
 			if (tvx != null)
 			{
+				int fieldNumber = fieldInfos.FieldNumber(field);
 				//We need to account for the FORMAT_SIZE at when seeking in the tvx
 				//We don't need to do this in other seeks because we already have the
 				// file pointer
 				//that was written in another file
-				tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+				tvx.Seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE);
 				//System.out.println("TVX Pointer: " + tvx.getFilePointer());
 				long position = tvx.ReadLong();
 				
@@ -144,7 +186,7 @@
 				int found = - 1;
 				for (int i = 0; i < fieldCount; i++)
 				{
-					if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+					if (tvdFormat == FORMAT_VERSION)
 						number = tvd.ReadVInt();
 					else
 						number += tvd.ReadVInt();
@@ -162,7 +204,8 @@
 					for (int i = 0; i <= found; i++)
 						position += tvd.ReadVLong();
 					
-					result = ReadTermVector(field, position);
+					mapper.SetDocumentNumber(docNum);
+					ReadTermVector(field, position, mapper);
 				}
 				else
 				{
@@ -173,7 +216,25 @@
 			{
 				//System.out.println("No tvx file");
 			}
-			return result;
+		}
+		
+		
+		
+		/// <summary> Retrieve the term vector for the given document and field</summary>
+		/// <param name="docNum">The document number to retrieve the vector for
+		/// </param>
+		/// <param name="field">The field within the document to retrieve
+		/// </param>
+		/// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
+		/// </returns>
+		/// <throws>  IOException if there is an error reading the term vector files </throws>
+		internal virtual TermFreqVector Get(int docNum, System.String field)
+		{
+			// Check if no term vectors are available for this segment at all
+			ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
+			Get(docNum, field, mapper);
+			
+			return mapper.MaterializeVector();
 		}
 		
 		/// <summary> Return all term vectors stored for this document or null if the could not be read in.
@@ -184,14 +245,13 @@
 		/// <returns> All term frequency vectors
 		/// </returns>
 		/// <throws>  IOException if there is an error reading the term vector files  </throws>
-		public virtual TermFreqVector[] Get(int docNum)
+		internal virtual TermFreqVector[] Get(int docNum)
 		{
 			TermFreqVector[] result = null;
-			// Check if no term vectors are available for this segment at all
 			if (tvx != null)
 			{
 				//We need to offset by
-				tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+				tvx.Seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE);
 				long position = tvx.ReadLong();
 				
 				tvd.Seek(position);
@@ -205,7 +265,7 @@
 					
 					for (int i = 0; i < fieldCount; i++)
 					{
-						if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+						if (tvdFormat == FORMAT_VERSION)
 							number = tvd.ReadVInt();
 						else
 							number += tvd.ReadVInt();
@@ -222,7 +282,7 @@
 						tvfPointers[i] = position;
 					}
 					
-					result = ReadTermVectors(fields, tvfPointers);
+					result = ReadTermVectors(docNum, fields, tvfPointers);
 				}
 			}
 			else
@@ -232,26 +292,87 @@
 			return result;
 		}
 		
+		public virtual void  Get(int docNumber, TermVectorMapper mapper)
+		{
+			// Check if no term vectors are available for this segment at all
+			if (tvx != null)
+			{
+				//We need to offset by
+				tvx.Seek((docNumber * 8L) + FORMAT_SIZE);
+				long position = tvx.ReadLong();
+				
+				tvd.Seek(position);
+				int fieldCount = tvd.ReadVInt();
+				
+				// No fields are vectorized for this document
+				if (fieldCount != 0)
+				{
+					int number = 0;
+					System.String[] fields = new System.String[fieldCount];
+					
+					for (int i = 0; i < fieldCount; i++)
+					{
+						if (tvdFormat == FORMAT_VERSION)
+							number = tvd.ReadVInt();
+						else
+							number += tvd.ReadVInt();
+						
+						fields[i] = fieldInfos.FieldName(number);
+					}
+					
+					// Compute position in the tvf file
+					position = 0;
+					long[] tvfPointers = new long[fieldCount];
+					for (int i = 0; i < fieldCount; i++)
+					{
+						position += tvd.ReadVLong();
+						tvfPointers[i] = position;
+					}
+					
+					mapper.SetDocumentNumber(docNumber);
+					ReadTermVectors(fields, tvfPointers, mapper);
+				}
+			}
+			else
+			{
+				//System.out.println("No tvx file");
+			}
+		}
+		
 		
-		private SegmentTermVector[] ReadTermVectors(System.String[] fields, long[] tvfPointers)
+		private SegmentTermVector[] ReadTermVectors(int docNum, System.String[] fields, long[] tvfPointers)
 		{
 			SegmentTermVector[] res = new SegmentTermVector[fields.Length];
 			for (int i = 0; i < fields.Length; i++)
 			{
-				res[i] = ReadTermVector(fields[i], tvfPointers[i]);
+				ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
+				mapper.SetDocumentNumber(docNum);
+				ReadTermVector(fields[i], tvfPointers[i], mapper);
+				res[i] = (SegmentTermVector) mapper.MaterializeVector();
 			}
 			return res;
 		}
 		
+		private void  ReadTermVectors(System.String[] fields, long[] tvfPointers, TermVectorMapper mapper)
+		{
+			for (int i = 0; i < fields.Length; i++)
+			{
+				ReadTermVector(fields[i], tvfPointers[i], mapper);
+			}
+		}
+		
+		
 		/// <summary> </summary>
 		/// <param name="field">The field to read in
 		/// </param>
 		/// <param name="tvfPointer">The pointer within the tvf file where we should start reading
 		/// </param>
+		/// <param name="mapper">The mapper used to map the TermVector
+		/// </param>
 		/// <returns> The TermVector located at that position
 		/// </returns>
 		/// <throws>  IOException </throws>
-		private SegmentTermVector ReadTermVector(System.String field, long tvfPointer)
+		private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
 		{
 			
 			// Now read the data from specified position
@@ -262,16 +383,16 @@
 			//System.out.println("Num Terms: " + numTerms);
 			// If no terms - return a constant empty termvector. However, this should never occur!
 			if (numTerms == 0)
-				return new SegmentTermVector(field, null, null);
+				return ;
 			
 			bool storePositions;
 			bool storeOffsets;
 			
-			if (tvfFormat == TermVectorsWriter.FORMAT_VERSION)
+			if (tvfFormat == FORMAT_VERSION)
 			{
 				byte bits = tvf.ReadByte();
-				storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
-				storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
+				storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+				storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
 			}
 			else
 			{
@@ -279,18 +400,7 @@
 				storePositions = false;
 				storeOffsets = false;
 			}
-			
-			System.String[] terms = new System.String[numTerms];
-			int[] termFreqs = new int[numTerms];
-			
-			//  we may not need these, but declare them
-			int[][] positions = null;
-			TermVectorOffsetInfo[][] offsets = null;
-			if (storePositions)
-				positions = new int[numTerms][];
-			if (storeOffsets)
-				offsets = new TermVectorOffsetInfo[numTerms][];
-			
+			mapper.SetExpectations(field, numTerms, storeOffsets, storePositions);
 			int start = 0;
 			int deltaLength = 0;
 			int totalLength = 0;
@@ -309,56 +419,70 @@
 					buffer = new char[totalLength];
 					
 					if (start > 0)
-						// just copy if necessary
+					// just copy if necessary
 						Array.Copy(previousBuffer, 0, buffer, 0, start);
 				}
 				
 				tvf.ReadChars(buffer, start, deltaLength);
-				terms[i] = new System.String(buffer, 0, totalLength);
+				System.String term = new System.String(buffer, 0, totalLength);
 				previousBuffer = buffer;
 				int freq = tvf.ReadVInt();
-				termFreqs[i] = freq;
-				
+				int[] positions = null;
 				if (storePositions)
 				{
 					//read in the positions
-					int[] pos = new int[freq];
-					positions[i] = pos;
-					int prevPosition = 0;
-					for (int j = 0; j < freq; j++)
+					//does the mapper even care about positions?
+					if (mapper.IsIgnoringPositions() == false)
 					{
-						pos[j] = prevPosition + tvf.ReadVInt();
-						prevPosition = pos[j];
+						positions = new int[freq];
+						int prevPosition = 0;
+						for (int j = 0; j < freq; j++)
+						{
+							positions[j] = prevPosition + tvf.ReadVInt();
+							prevPosition = positions[j];
+						}
+					}
+					else
+					{
+						//we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
+						//
+						for (int j = 0; j < freq; j++)
+						{
+							tvf.ReadVInt();
+						}
 					}
 				}
-				
+				TermVectorOffsetInfo[] offsets = null;
 				if (storeOffsets)
 				{
-					TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
-					offsets[i] = offs;
-					int prevOffset = 0;
-					for (int j = 0; j < freq; j++)
+					//does the mapper even care about offsets?
+					if (mapper.IsIgnoringOffsets() == false)
+					{
+						offsets = new TermVectorOffsetInfo[freq];
+						int prevOffset = 0;
+						for (int j = 0; j < freq; j++)
+						{
+							int startOffset = prevOffset + tvf.ReadVInt();
+							int endOffset = startOffset + tvf.ReadVInt();
+							offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
+							prevOffset = endOffset;
+						}
+					}
+					else
 					{
-						int startOffset = prevOffset + tvf.ReadVInt();
-						int endOffset = startOffset + tvf.ReadVInt();
-						offs[j] = new TermVectorOffsetInfo(startOffset, endOffset);
-						prevOffset = endOffset;
+						for (int j = 0; j < freq; j++)
+						{
+							tvf.ReadVInt();
+							tvf.ReadVInt();
+						}
 					}
 				}
+				mapper.Map(term, freq, offsets, positions);
 			}
-			
-			SegmentTermVector tv;
-			if (storePositions || storeOffsets)
-			{
-				tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
-			}
-			else
-			{
-				tv = new SegmentTermVector(field, terms, termFreqs);
-			}
-			return tv;
 		}
 		
+		
+		
 		public virtual System.Object Clone()
 		{
 			
@@ -370,7 +494,7 @@
 			{
 				clone = (TermVectorsReader) base.MemberwiseClone();
 			}
-			catch (System.Exception)
+			catch (System.Exception e)
 			{
 			}
 			
@@ -381,4 +505,66 @@
 			return clone;
 		}
 	}
+	
+	/// <summary> Models the existing parallel array structure</summary>
+	class ParallelArrayTermVectorMapper:TermVectorMapper
+	{
+		
+		private System.String[] terms;
+		private int[] termFreqs;
+		private int[][] positions;
+		private TermVectorOffsetInfo[][] offsets;
+		private int currentPosition;
+		private bool storingOffsets;
+		private bool storingPositions;
+		private System.String field;
+		
+		public override void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+		{
+			this.field = field;
+			terms = new System.String[numTerms];
+			termFreqs = new int[numTerms];
+			this.storingOffsets = storeOffsets;
+			this.storingPositions = storePositions;
+			if (storePositions)
+				this.positions = new int[numTerms][];
+			if (storeOffsets)
+				this.offsets = new TermVectorOffsetInfo[numTerms][];
+		}
+		
+		public override void  Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+		{
+			terms[currentPosition] = term;
+			termFreqs[currentPosition] = frequency;
+			if (storingOffsets)
+			{
+				this.offsets[currentPosition] = offsets;
+			}
+			if (storingPositions)
+			{
+				this.positions[currentPosition] = positions;
+			}
+			currentPosition++;
+		}
+		
+		/// <summary> Construct the vector</summary>
+		/// <returns> The {@link TermFreqVector} based on the mappings.
+		/// </returns>
+		public virtual TermFreqVector MaterializeVector()
+		{
+			SegmentTermVector tv = null;
+			if (field != null && terms != null)
+			{
+				if (storingPositions || storingOffsets)
+				{
+					tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+				}
+				else
+				{
+					tv = new SegmentTermVector(field, terms, termFreqs);
+				}
+			}
+			return tv;
+		}
+	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsWriter.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsWriter.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
  */
 
 using System;
+
 using Directory = Lucene.Net.Store.Directory;
 using IndexOutput = Lucene.Net.Store.IndexOutput;
 using StringHelper = Lucene.Net.Util.StringHelper;
@@ -23,404 +24,189 @@
 namespace Lucene.Net.Index
 {
 	
-	/// <summary> Writer works by opening a document and then opening the fields within the document and then
-	/// writing out the vectors for each field.
-	/// 
-	/// Rough usage:
-	/// 
-	/// <CODE>
-	/// for each document
-	/// {
-	/// writer.openDocument();
-	/// for each field on the document
-	/// {
-	/// writer.openField(field);
-	/// for all of the terms
-	/// {
-	/// writer.addTerm(...)
-	/// }
-	/// writer.closeField
-	/// }
-	/// writer.closeDocument()    
-	/// }
-	/// </CODE>
-	/// 
-	/// </summary>
-	/// <version>  $Id: TermVectorsWriter.java 472959 2006-11-09 16:21:50Z yonik $
-	/// 
-	/// </version>
 	public sealed class TermVectorsWriter
 	{
-		internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1);
-		internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x2);
-		
-		internal const int FORMAT_VERSION = 2;
-		//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
-		internal const int FORMAT_SIZE = 4;
-		
-		internal const System.String TVX_EXTENSION = ".tvx";
-		internal const System.String TVD_EXTENSION = ".tvd";
-		internal const System.String TVF_EXTENSION = ".tvf";
 		
 		private IndexOutput tvx = null, tvd = null, tvf = null;
-		private System.Collections.ArrayList fields = null;
-		private System.Collections.ArrayList terms = null;
 		private FieldInfos fieldInfos;
 		
-		private TVField currentField = null;
-		private long currentDocPointer = - 1;
-
-		// Those three get'ers are helper for Lucene.Net only
-		public static System.String TvxExtension
-		{
-			get {   return TVX_EXTENSION;   }
-		}
-		public static System.String TvdExtension
-		{
-			get {   return TVD_EXTENSION;   }
-		}
-		public static System.String TvfExtension
-		{
-			get {   return TVF_EXTENSION;   }
-		}
-
 		public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
 		{
 			// Open files for TermVector storage
-			tvx = directory.CreateOutput(segment + TVX_EXTENSION);
-			tvx.WriteInt(FORMAT_VERSION);
-			tvd = directory.CreateOutput(segment + TVD_EXTENSION);
-			tvd.WriteInt(FORMAT_VERSION);
-			tvf = directory.CreateOutput(segment + TVF_EXTENSION);
-			tvf.WriteInt(FORMAT_VERSION);
+			tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+			tvx.WriteInt(TermVectorsReader.FORMAT_VERSION);
+			tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+			tvd.WriteInt(TermVectorsReader.FORMAT_VERSION);
+			tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+			tvf.WriteInt(TermVectorsReader.FORMAT_VERSION);
 			
 			this.fieldInfos = fieldInfos;
-			fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size()));
-			terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
-		}
-		
-		
-		public void  OpenDocument()
-		{
-			CloseDocument();
-			currentDocPointer = tvd.GetFilePointer();
-		}
-		
-		
-		public void  CloseDocument()
-		{
-			if (IsDocumentOpen())
-			{
-				CloseField();
-				WriteDoc();
-				fields.Clear();
-				currentDocPointer = - 1;
-			}
-		}
-		
-		
-		public bool IsDocumentOpen()
-		{
-			return currentDocPointer != - 1;
-		}
-		
-		
-		/// <summary>Start processing a field. This can be followed by a number of calls to
-		/// addTerm, and a final call to closeField to indicate the end of
-		/// processing of this field. If a field was previously open, it is
-		/// closed automatically.
-		/// </summary>
-		public void  OpenField(System.String field)
-		{
-			FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
-			OpenField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector);
-		}
-		
-		private void  OpenField(int fieldNumber, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
-		{
-			if (!IsDocumentOpen())
-				throw new System.SystemException("Cannot open field when no document is open.");
-			CloseField();
-			currentField = new TVField(fieldNumber, storePositionWithTermVector, storeOffsetWithTermVector);
-		}
-		
-		/// <summary>Finished processing current field. This should be followed by a call to
-		/// openField before future calls to addTerm.
-		/// </summary>
-		public void  CloseField()
-		{
-			if (IsFieldOpen())
-			{
-				/* DEBUG */
-				//System.out.println("closeField()");
-				/* DEBUG */
-				
-				// save field and terms
-				WriteField();
-				fields.Add(currentField);
-				terms.Clear();
-				currentField = null;
-			}
-		}
-		
-		/// <summary>Return true if a field is currently open. </summary>
-		public bool IsFieldOpen()
-		{
-			return currentField != null;
-		}
-		
-		/// <summary>Add term to the field's term vector. Fieldable must already be open.
-		/// Terms should be added in
-		/// increasing order of terms, one call per unique termNum. ProxPointer
-		/// is a pointer into the TermPosition file (prx). Freq is the number of
-		/// times this term appears in this field, in this document.
-		/// </summary>
-		/// <throws>  IllegalStateException if document or field is not open </throws>
-		public void  AddTerm(System.String termText, int freq)
-		{
-			AddTerm(termText, freq, null, null);
-		}
-		
-		public void  AddTerm(System.String termText, int freq, int[] positions, TermVectorOffsetInfo[] offsets)
-		{
-			if (!IsDocumentOpen())
-				throw new System.SystemException("Cannot add terms when document is not open");
-			if (!IsFieldOpen())
-				throw new System.SystemException("Cannot add terms when field is not open");
-			
-			AddTermInternal(termText, freq, positions, offsets);
-		}
-		
-		private void  AddTermInternal(System.String termText, int freq, int[] positions, TermVectorOffsetInfo[] offsets)
-		{
-			TVTerm term = new TVTerm();
-			term.termText = termText;
-			term.freq = freq;
-			term.positions = positions;
-			term.offsets = offsets;
-			terms.Add(term);
 		}
 		
 		/// <summary> Add a complete document specified by all its term vectors. If document has no
 		/// term vectors, add value for tvx.
 		/// 
 		/// </summary>
-		/// <param name="">vectors
+		/// <param name="vectors">
 		/// </param>
 		/// <throws>  IOException </throws>
 		public void  AddAllDocVectors(TermFreqVector[] vectors)
 		{
-			OpenDocument();
+			
+			tvx.WriteLong(tvd.GetFilePointer());
 			
 			if (vectors != null)
 			{
-				for (int i = 0; i < vectors.Length; i++)
+				int numFields = vectors.Length;
+				tvd.WriteVInt(numFields);
+				
+				long[] fieldPointers = new long[numFields];
+				
+				for (int i = 0; i < numFields; i++)
 				{
-					bool storePositionWithTermVector = false;
-					bool storeOffsetWithTermVector = false;
+					fieldPointers[i] = tvf.GetFilePointer();
+					
+					int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField());
+					
+					// 1st pass: write field numbers to tvd
+					tvd.WriteVInt(fieldNumber);
+					
+					int numTerms = vectors[i].Size();
+					tvf.WriteVInt(numTerms);
+					
+					TermPositionVector tpVector;
+					
+					byte bits;
+					bool storePositions;
+					bool storeOffsets;
 					
 					if (vectors[i] is TermPositionVector)
 					{
-						
-						TermPositionVector tpVector = (TermPositionVector) vectors[i];
-						
-						if (tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null)
-							storePositionWithTermVector = true;
-						if (tpVector.Size() > 0 && tpVector.GetOffsets(0) != null)
-							storeOffsetWithTermVector = true;
-						
-						FieldInfo fieldInfo = fieldInfos.FieldInfo(tpVector.GetField());
-						OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
-						
-						for (int j = 0; j < tpVector.Size(); j++)
-							AddTermInternal(tpVector.GetTerms()[j], tpVector.GetTermFrequencies()[j], tpVector.GetTermPositions(j), tpVector.GetOffsets(j));
-						
-						CloseField();
+						// May have positions & offsets
+						tpVector = (TermPositionVector) vectors[i];
+						storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null;
+						storeOffsets = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null;
+						bits = (byte) ((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR : (byte) 0) + (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR : (byte) 0));
 					}
 					else
 					{
-						
-						TermFreqVector tfVector = vectors[i];
-						
-						FieldInfo fieldInfo = fieldInfos.FieldInfo(tfVector.GetField());
-						OpenField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
-						
-						for (int j = 0; j < tfVector.Size(); j++)
-							AddTermInternal(tfVector.GetTerms()[j], tfVector.GetTermFrequencies()[j], null, null);
-						
-						CloseField();
-					}
-				}
-			}
-			
-			CloseDocument();
-		}
-		
-		/// <summary>Close all streams. </summary>
-		public void  Close()
-		{
-			try
-			{
-				CloseDocument();
-			}
-			finally
-			{
-				// make an effort to close all streams we can but remember and re-throw
-				// the first exception encountered in this process
-				System.IO.IOException keep = null;
-				if (tvx != null)
-					try
-					{
-						tvx.Close();
-					}
-					catch (System.IO.IOException e)
-					{
-						if (keep == null)
-							keep = e;
-					}
-				if (tvd != null)
-					try
-					{
-						tvd.Close();
+						tpVector = null;
+						bits = 0;
+						storePositions = false;
+						storeOffsets = false;
 					}
-					catch (System.IO.IOException e)
-					{
-						if (keep == null)
-							keep = e;
-					}
-				if (tvf != null)
-					try
-					{
-						tvf.Close();
-					}
-					catch (System.IO.IOException e)
+					
+					tvf.WriteVInt(bits);
+					
+					System.String[] terms = vectors[i].GetTerms();
+					int[] freqs = vectors[i].GetTermFrequencies();
+					
+					System.String lastTermText = "";
+					for (int j = 0; j < numTerms; j++)
 					{
-						if (keep == null)
-							keep = e;
+						System.String termText = terms[j];
+						int start = StringHelper.StringDifference(lastTermText, termText);
+						int length = termText.Length - start;
+						tvf.WriteVInt(start); // write shared prefix length
+						tvf.WriteVInt(length); // write delta length
+						tvf.WriteChars(termText, start, length); // write delta chars
+						lastTermText = termText;
+						
+						int termFreq = freqs[j];
+						
+						tvf.WriteVInt(termFreq);
+						
+						if (storePositions)
+						{
+							int[] positions = tpVector.GetTermPositions(j);
+							if (positions == null)
+								throw new System.SystemException("Trying to write positions that are null!");
+							System.Diagnostics.Debug.Assert(positions.Length == termFreq);
+							
+							// use delta encoding for positions
+							int lastPosition = 0;
+							for (int k = 0; k < positions.Length; k++)
+							{
+								int position = positions[k];
+								tvf.WriteVInt(position - lastPosition);
+								lastPosition = position;
+							}
+						}
+						
+						if (storeOffsets)
+						{
+							TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j);
+							if (offsets == null)
+								throw new System.SystemException("Trying to write offsets that are null!");
+							System.Diagnostics.Debug.Assert(offsets.Length == termFreq);
+							
+							// use delta encoding for offsets
+							int lastEndOffset = 0;
+							for (int k = 0; k < offsets.Length; k++)
+							{
+								int startOffset = offsets[k].GetStartOffset();
+								int endOffset = offsets[k].GetEndOffset();
+								tvf.WriteVInt(startOffset - lastEndOffset);
+								tvf.WriteVInt(endOffset - startOffset);
+								lastEndOffset = endOffset;
+							}
+						}
 					}
-				if (keep != null)
+				}
+				
+				// 2nd pass: write field pointers to tvd
+				long lastFieldPointer = 0;
+				for (int i = 0; i < numFields; i++)
 				{
-					throw new System.IO.IOException(keep.StackTrace);
+					long fieldPointer = fieldPointers[i];
+					tvd.WriteVLong(fieldPointer - lastFieldPointer);
+					lastFieldPointer = fieldPointer;
 				}
 			}
+			else
+				tvd.WriteVInt(0);
 		}
 		
-		
-		
-		private void  WriteField()
+		/// <summary>Close all streams. </summary>
+		internal void  Close()
 		{
-			// remember where this field is written
-			currentField.tvfPointer = tvf.GetFilePointer();
-			//System.out.println("Fieldable Pointer: " + currentField.tvfPointer);
-			
-			int size = terms.Count;
-			tvf.WriteVInt(size);
-			
-			bool storePositions = currentField.storePositions;
-			bool storeOffsets = currentField.storeOffsets;
-			byte bits = (byte) (0x0);
-			if (storePositions)
-				bits |= STORE_POSITIONS_WITH_TERMVECTOR;
-			if (storeOffsets)
-				bits |= STORE_OFFSET_WITH_TERMVECTOR;
-			tvf.WriteByte(bits);
-			
-			System.String lastTermText = "";
-			for (int i = 0; i < size; i++)
-			{
-				TVTerm term = (TVTerm) terms[i];
-				int start = StringHelper.StringDifference(lastTermText, term.termText);
-				int length = term.termText.Length - start;
-				tvf.WriteVInt(start); // write shared prefix length
-				tvf.WriteVInt(length); // write delta length
-				tvf.WriteChars(term.termText, start, length); // write delta chars
-				tvf.WriteVInt(term.freq);
-				lastTermText = term.termText;
-				
-				if (storePositions)
+			// make an effort to close all streams we can but remember and re-throw
+			// the first exception encountered in this process
+			System.IO.IOException keep = null;
+			if (tvx != null)
+				try
 				{
-					if (term.positions == null)
-						throw new System.SystemException("Trying to write positions that are null!");
-					
-					// use delta encoding for positions
-					int position = 0;
-					for (int j = 0; j < term.freq; j++)
-					{
-						tvf.WriteVInt(term.positions[j] - position);
-						position = term.positions[j];
-					}
+					tvx.Close();
 				}
-				
-				if (storeOffsets)
+				catch (System.IO.IOException e)
 				{
-					if (term.offsets == null)
-						throw new System.SystemException("Trying to write offsets that are null!");
-					
-					// use delta encoding for offsets
-					int position = 0;
-					for (int j = 0; j < term.freq; j++)
-					{
-						tvf.WriteVInt(term.offsets[j].GetStartOffset() - position);
-						tvf.WriteVInt(term.offsets[j].GetEndOffset() - term.offsets[j].GetStartOffset()); //Save the diff between the two.
-						position = term.offsets[j].GetEndOffset();
-					}
+					if (keep == null)
+						keep = e;
 				}
-			}
-		}
-		
-		private void  WriteDoc()
-		{
-			if (IsFieldOpen())
-				throw new System.SystemException("Field is still open while writing document");
-			//System.out.println("Writing doc pointer: " + currentDocPointer);
-			// write document index record
-			tvx.WriteLong(currentDocPointer);
-			
-			// write document data record
-			int size = fields.Count;
-			
-			// write the number of fields
-			tvd.WriteVInt(size);
-			
-			// write field numbers
-			for (int i = 0; i < size; i++)
-			{
-				TVField field = (TVField) fields[i];
-				tvd.WriteVInt(field.number);
-			}
-			
-			// write field pointers
-			long lastFieldPointer = 0;
-			for (int i = 0; i < size; i++)
-			{
-				TVField field = (TVField) fields[i];
-				tvd.WriteVLong(field.tvfPointer - lastFieldPointer);
-				lastFieldPointer = field.tvfPointer;
-			}
-			//System.out.println("After writing doc pointer: " + tvx.getFilePointer());
-		}
-		
-		
-		private class TVField
-		{
-			internal int number;
-			internal long tvfPointer = 0;
-			internal bool storePositions = false;
-			internal bool storeOffsets = false;
-			internal TVField(int number, bool storePos, bool storeOff)
+			if (tvd != null)
+				try
+				{
+					tvd.Close();
+				}
+				catch (System.IO.IOException e)
+				{
+					if (keep == null)
+						keep = e;
+				}
+			if (tvf != null)
+				try
+				{
+					tvf.Close();
+				}
+				catch (System.IO.IOException e)
+				{
+					if (keep == null)
+						keep = e;
+				}
+			if (keep != null)
 			{
-				this.number = number;
-				storePositions = storePos;
-				storeOffsets = storeOff;
+				throw new System.IO.IOException(keep.StackTrace);
 			}
 		}
-		
-		private class TVTerm
-		{
-			internal System.String termText;
-			internal int freq = 0;
-			internal int[] positions = null;
-			internal TermVectorOffsetInfo[] offsets = null;
-		}
 	}
 }
\ No newline at end of file