You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2012/04/07 01:37:54 UTC
svn commit: r1310635 [2/8] - in /incubator/lucene.net/trunk: build/vs2010/contrib/ build/vs2010/test/ src/contrib/FastVectorHighlighter/ src/contrib/Highlighter/ src/contrib/Memory/ src/contrib/Memory/Properties/ src/contrib/Queries/ src/contrib/Querie...

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/Highlighter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/Highlighter.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/Highlighter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/Highlighter.cs Fri Apr  6 23:37:48 2012
@@ -16,511 +16,462 @@
  */
 
 using System;
-using Lucene.Net.Search.Highlight;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Util;
-using Analyzer = Lucene.Net.Analysis.Analyzer;
-using Token = Lucene.Net.Analysis.Token;
-using TokenStream = Lucene.Net.Analysis.TokenStream;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
-	/// <summary> Class used to markup highlighted terms found in the best sections of a 
-	/// text, using configurable <see cref="IFragmenter"/>, <see cref="IScorer"/>, <see cref="IFormatter"/>, 
-	/// <see cref="IEncoder"/> and tokenizers.
-	/// </summary>
-	/// <author>  mark@searcharea.co.uk
-	/// </author>
-	public class Highlighter
-	{
-		
-		public const int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE = 50 * 1024;
-		private int maxDocBytesToAnalyze = DEFAULT_MAX_DOC_BYTES_TO_ANALYZE;
-		private IFormatter formatter;
-		private IEncoder encoder;
-		private IFragmenter textFragmenter = new SimpleFragmenter();
-		private IScorer fragmentScorer = null;
-		
-		public Highlighter(IScorer fragmentScorer) : this(new SimpleHTMLFormatter(), fragmentScorer)
-		{
-		}
-		
-		
-		public Highlighter(IFormatter formatter, IScorer fragmentScorer) : this(formatter, new DefaultEncoder(), fragmentScorer)
-		{
-		}
-		
-		
-		public Highlighter(IFormatter formatter, IEncoder encoder, IScorer fragmentScorer)
-		{
-			this.formatter = formatter;
-			this.encoder = encoder;
-			this.fragmentScorer = fragmentScorer;
-		}
-		
-		/// <summary> Highlights chosen terms in a text, extracting the most relevant section.
-		/// This is a convenience method that calls
-		/// <see cref="GetBestFragment(TokenStream, String)"/>
-		/// 
-		/// </summary>
-		/// <param name="analyzer">  the analyzer that will be used to split <c>text</c>
-		/// into chunks  
-		/// </param>
-		/// <param name="text">text to highlight terms in
-		/// </param>
-		/// <param name="fieldName">Name of field used to influence analyzer's tokenization policy 
-		/// 
-		/// </param>
-		/// <returns> highlighted text fragment or null if no terms found
-		/// </returns>
-		public System.String GetBestFragment(Analyzer analyzer, System.String fieldName, System.String text)
-		{
-			TokenStream tokenStream = analyzer.TokenStream(fieldName, new System.IO.StringReader(text));
-			return GetBestFragment(tokenStream, text);
-		}
-		
-		/// <summary> Highlights chosen terms in a text, extracting the most relevant section.
-		/// The document text is analysed in chunks to record hit statistics
-		/// across the document. After accumulating stats, the fragment with the highest score
-		/// is returned
-		/// 
-		/// </summary>
-		/// <param name="tokenStream">  a stream of tokens identified in the text parameter, including offset information. 
-		/// This is typically produced by an analyzer re-parsing a document's 
-		/// text. Some work may be done on retrieving TokenStreams more efficently 
-		/// by adding support for storing original text position data in the Lucene
-		/// index but this support is not currently available (as of Lucene 1.4 rc2).  
-		/// </param>
-		/// <param name="text">text to highlight terms in
-		/// 
-		/// </param>
-		/// <returns> highlighted text fragment or null if no terms found
-		/// </returns>
-		public System.String GetBestFragment(TokenStream tokenStream, System.String text)
-		{
-			System.String[] results = GetBestFragments(tokenStream, text, 1);
-			if (results.Length > 0)
-			{
-				return results[0];
-			}
-			return null;
-		}
-		
-		/// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
-		/// This is a convenience method that calls
-		/// <see cref="GetBestFragments(TokenStream, String, int)"/>
-		/// 
-		/// </summary>
-		/// <param name="analyzer">  the analyzer that will be used to split <c>text</c>
-		/// into chunks  
-		/// </param>
-		/// <param name="text">       	text to highlight terms in
-		/// </param>
-		/// <param name="maxNumFragments"> the maximum number of fragments.
-		/// </param>
-		/// <deprecated> This method incorrectly hardcodes the choice of fieldname. Use the
-		/// method of the same name that takes a fieldname.
-		/// </deprecated>
-		/// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
-		/// </returns>
-		public System.String[] GetBestFragments(Analyzer analyzer, System.String text, int maxNumFragments)
-		{
-			TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text));
-			return GetBestFragments(tokenStream, text, maxNumFragments);
-		}
-		/// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
-		/// This is a convenience method that calls
-		/// <see cref="GetBestFragments(TokenStream, String, int)"/>
-		/// 
-		/// </summary>
-		/// <param name="analyzer">  the analyzer that will be used to split <c>text</c>
-		/// into chunks  
-		/// </param>
-		/// <param name="fieldName">    the name of the field being highlighted (used by analyzer)
-		/// </param>
-		/// <param name="text">       	text to highlight terms in
-		/// </param>
-		/// <param name="maxNumFragments"> the maximum number of fragments.
-		/// 
-		/// </param>
-		/// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
-		/// </returns>
-		public System.String[] GetBestFragments(Analyzer analyzer, System.String fieldName, System.String text, int maxNumFragments)
-		{
-			TokenStream tokenStream = analyzer.TokenStream(fieldName, new System.IO.StringReader(text));
-			return GetBestFragments(tokenStream, text, maxNumFragments);
-		}
-		
-		/// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
-		/// The document text is analysed in chunks to record hit statistics
-		/// across the document. After accumulating stats, the fragments with the highest scores
-		/// are returned as an array of strings in order of score (contiguous fragments are merged into 
-		/// one in their original order to improve readability)
-		/// 
-		/// </summary>
-		/// <param name="text">       	text to highlight terms in
-		/// </param>
-		/// <param name="maxNumFragments"> the maximum number of fragments.
-		/// 
-		/// </param>
-		/// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
-		/// </returns>
-		public System.String[] GetBestFragments(TokenStream tokenStream, System.String text, int maxNumFragments)
-		{
-			maxNumFragments = System.Math.Max(1, maxNumFragments); //sanity check
-			
-			TextFragment[] frag = GetBestTextFragments(tokenStream, text, true, maxNumFragments);
-			
-			//Get text
-			System.Collections.ArrayList fragTexts = new System.Collections.ArrayList();
-			for (int i = 0; i < frag.Length; i++)
-			{
-				if ((frag[i] != null) && (frag[i].GetScore() > 0))
-				{
-					fragTexts.Add(frag[i].ToString());
-				}
-			}
-			return (System.String[]) fragTexts.ToArray(typeof(System.String));
-		}
-		
-		
-		/// <summary> Low level api to get the most relevant (formatted) sections of the document.
-		/// This method has been made public to allow visibility of score information held in TextFragment objects.
-		/// Thanks to Jason Calabrese for help in redefining the interface.  
-		/// </summary>
+    /// <summary>
+    /// Class used to markup highlighted terms found in the best sections of a
+    /// text, using configurable <see cref="IFragmenter"/>, <see cref="Scorer"/>, <see cref="IFormatter"/>,
+    /// <see cref="IEncoder"/> and tokenizers.
+    /// </summary>
+    public class Highlighter
+    {
+        public static readonly int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
+
+        private int _maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
+        private IFormatter _formatter;
+        private IEncoder _encoder;
+        private IFragmenter _textFragmenter = new SimpleFragmenter();
+        private IScorer _fragmentScorer = null;
+
+        public Highlighter(IScorer fragmentScorer)
+            : this(new SimpleHTMLFormatter(), fragmentScorer)
+        {
+        }
+
+
+        public Highlighter(IFormatter formatter, IScorer fragmentScorer)
+            : this(formatter, new DefaultEncoder(), fragmentScorer)
+        {
+        }
+
+
+        public Highlighter(IFormatter formatter, IEncoder encoder, IScorer fragmentScorer)
+        {
+            _formatter = formatter;
+            _encoder = encoder;
+            _fragmentScorer = fragmentScorer;
+        }
+
+        /// <summary>
+        /// Highlights chosen terms in a text, extracting the most relevant section.
+        /// This is a convenience method that calls <see cref="GetBestFragment(TokenStream, string)"/>
+        /// </summary>
+        /// <param name="analyzer">the analyzer that will be used to split <code>text</code> into chunks</param>
+        /// <param name="fieldName">Name of field used to influence analyzer's tokenization policy</param>
+        /// <param name="text">text to highlight terms in</param>
+        /// <returns>highlighted text fragment or null if no terms found</returns>
+        /// <exception cref="InvalidTokenOffsetsException">thrown if any token's endOffset exceeds the provided text's length</exception>
+        public String GetBestFragment(Analyzer analyzer, String fieldName, String text)
+        {
+            TokenStream tokenStream = analyzer.TokenStream(fieldName, new StringReader(text));
+            return GetBestFragment(tokenStream, text);
+        }
+
+        /// <summary>
+        /// Highlights chosen terms in a text, extracting the most relevant section.
+        /// The document text is analysed in chunks to record hit statistics
+        /// across the document. After accumulating stats, the fragment with the highest score
+        /// is returned
+        /// </summary>
         /// <param name="tokenStream">
-		/// </param>
-        /// <param name="text">
-		/// </param>
-        /// <param name="maxNumFragments">
-		/// </param>
-        /// <param name="mergeContiguousFragments">
-		/// </param>
-		/// <throws>  IOException </throws>
-		public TextFragment[] GetBestTextFragments(TokenStream tokenStream, System.String text, bool mergeContiguousFragments, int maxNumFragments)
-		{
-            //System.Collections.ArrayList docFrags = new System.Collections.ArrayList();
-            //System.Text.StringBuilder newText = new System.Text.StringBuilder();
-			
-            //TextFragment currentFrag = new TextFragment(newText, newText.Length, docFrags.Count);
-            //fragmentScorer.StartFragment(currentFrag);
-            //docFrags.Add(currentFrag);
-			
-            //FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
-			
-            //try
-            //{
-            //    Lucene.Net.Analysis.Token token;
-            //    System.String tokenText;
-            //    int startOffset;
-            //    int endOffset;
-            //    int lastEndOffset = 0;
-            //    textFragmenter.Start(text);
-				
-            //    TokenGroup tokenGroup = new TokenGroup();
-            //    token = tokenStream.Next();
-            //    while ((token != null) && (token.StartOffset() < maxDocBytesToAnalyze))
-            //    {
-            //        if ((tokenGroup.numTokens > 0) && (tokenGroup.IsDistinct(token)))
-            //        {
-            //            //the current token is distinct from previous tokens - 
-            //            // markup the cached token group info
-            //            startOffset = tokenGroup.matchStartOffset;
-            //            endOffset = tokenGroup.matchEndOffset;
-            //            tokenText = text.Substring(startOffset, (endOffset) - (startOffset));
-            //            System.String markedUpText = formatter.HighlightTerm(encoder.EncodeText(tokenText), tokenGroup);
-            //            //store any whitespace etc from between this and last group
-            //            if (startOffset > lastEndOffset)
-            //                newText.Append(encoder.EncodeText(text.Substring(lastEndOffset, (startOffset) - (lastEndOffset))));
-            //            newText.Append(markedUpText);
-            //            lastEndOffset = System.Math.Max(endOffset, lastEndOffset);
-            //            tokenGroup.Clear();
-						
-            //            //check if current token marks the start of a new fragment						
-            //            if (textFragmenter.IsNewFragment(token))
-            //            {
-            //                currentFrag.SetScore(fragmentScorer.GetFragmentScore());
-            //                //record stats for a new fragment
-            //                currentFrag.textEndPos = newText.Length;
-            //                currentFrag = new TextFragment(newText, newText.Length, docFrags.Count);
-            //                fragmentScorer.StartFragment(currentFrag);
-            //                docFrags.Add(currentFrag);
-            //            }
-            //        }
-					
-            //        tokenGroup.AddToken(token, fragmentScorer.GetTokenScore(token));
-					
-            //        //				if(lastEndOffset>maxDocBytesToAnalyze)
-            //        //				{
-            //        //					break;
-            //        //				}
-            //        token = tokenStream.Next();
-            //    }
-            //    currentFrag.SetScore(fragmentScorer.GetFragmentScore());
-				
-            //    if (tokenGroup.numTokens > 0)
-            //    {
-            //        //flush the accumulated text (same code as in above loop)
-            //        startOffset = tokenGroup.matchStartOffset;
-            //        endOffset = tokenGroup.matchEndOffset;
-            //        tokenText = text.Substring(startOffset, (endOffset) - (startOffset));
-            //        System.String markedUpText = formatter.HighlightTerm(encoder.EncodeText(tokenText), tokenGroup);
-            //        //store any whitespace etc from between this and last group
-            //        if (startOffset > lastEndOffset)
-            //            newText.Append(encoder.EncodeText(text.Substring(lastEndOffset, (startOffset) - (lastEndOffset))));
-            //        newText.Append(markedUpText);
-            //        lastEndOffset = System.Math.Max(lastEndOffset, endOffset);
-            //    }
-				
-            //    //Test what remains of the original text beyond the point where we stopped analyzing 
-            //    if ((lastEndOffset < text.Length) && (text.Length < maxDocBytesToAnalyze))
-            //    {
-            //        //append it to the last fragment
-            //        newText.Append(encoder.EncodeText(text.Substring(lastEndOffset)));
-            //    }
-				
-            //    currentFrag.textEndPos = newText.Length;
-				
-            //    //sort the most relevant sections of the text
-            //    for (System.Collections.IEnumerator i = docFrags.GetEnumerator(); i.MoveNext(); )
-            //    {
-            //        currentFrag = (TextFragment) i.Current;
-					
-            //        //If you are running with a version of Lucene before 11th Sept 03
-            //        // you do not have PriorityQueue.insert() - so uncomment the code below					
-            //        /*
-            //        if (currentFrag.getScore() >= minScore)
-            //        {
-            //        fragQueue.put(currentFrag);
-            //        if (fragQueue.size() > maxNumFragments)
-            //        { // if hit queue overfull
-            //        fragQueue.pop(); // remove lowest in hit queue
-            //        minScore = ((TextFragment) fragQueue.top()).getScore(); // reset minScore
-            //        }
-					
-					
-            //        }
-            //        */
-            //        //The above code caused a problem as a result of Christoph Goller's 11th Sept 03
-            //        //fix to PriorityQueue. The correct method to use here is the new "insert" method
-            //        // USE ABOVE CODE IF THIS DOES NOT COMPILE!
-            //        fragQueue.Insert(currentFrag);
-            //    }
-				
-            //    //return the most relevant fragments
-            //    TextFragment[] frag = new TextFragment[fragQueue.Size()];
-            //    for (int i = frag.Length - 1; i >= 0; i--)
-            //    {
-            //        frag[i] = (TextFragment) fragQueue.Pop();
-            //    }
-				
-            //    //merge any contiguous fragments to improve readability
-            //    if (mergeContiguousFragments)
-            //    {
-            //        MergeContiguousFragments(frag);
-            //        System.Collections.ArrayList fragTexts = new System.Collections.ArrayList();
-            //        for (int i = 0; i < frag.Length; i++)
-            //        {
-            //            if ((frag[i] != null) && (frag[i].GetScore() > 0))
-            //            {
-            //                fragTexts.Add(frag[i]);
-            //            }
-            //        }
-            //        frag = (TextFragment[]) fragTexts.ToArray(typeof(TextFragment));
-            //    }
-				
-            //    return frag;
-            //}
-            //finally
-            //{
-            //    if (tokenStream != null)
-            //    {
-            //        try
-            //        {
-            //            tokenStream.Close();
-            //        }
-            //        catch (System.Exception e)
-            //        {
-            //        }
-            //    }
-            //}
-            throw new NotImplementedException("Not yet ported to 3.0.3");
-		}
-		
-		
-		/// <summary>Improves readability of a score-sorted list of TextFragments by merging any fragments 
-		/// that were contiguous in the original text into one larger fragment with the correct order.
-		/// This will leave a "null" in the array entry for the lesser scored fragment. 
-		/// 
-		/// </summary>
-		/// <param name="frag">An array of document fragments in descending score
-		/// </param>
-		private void  MergeContiguousFragments(TextFragment[] frag)
-		{
-			bool mergingStillBeingDone;
-			if (frag.Length > 1)
-				do 
-				{
-					mergingStillBeingDone = false; //initialise loop control flag
-					//for each fragment, scan other frags looking for contiguous blocks
-					for (int i = 0; i < frag.Length; i++)
-					{
-						if (frag[i] == null)
-						{
-							continue;
-						}
-						//merge any contiguous blocks 
-						for (int x = 0; x < frag.Length; x++)
-						{
-							if (frag[x] == null)
-							{
-								continue;
-							}
-							if (frag[i] == null)
-							{
-								break;
-							}
-							TextFragment frag1 = null;
-							TextFragment frag2 = null;
-							int frag1Num = 0;
-							int frag2Num = 0;
-							int bestScoringFragNum;
-							int worstScoringFragNum;
-							//if blocks are contiguous....
-							if (frag[i].Follows(frag[x]))
-							{
-								frag1 = frag[x];
-								frag1Num = x;
-								frag2 = frag[i];
-								frag2Num = i;
-							}
-							else if (frag[x].Follows(frag[i]))
-							{
-								frag1 = frag[i];
-								frag1Num = i;
-								frag2 = frag[x];
-								frag2Num = x;
-							}
-							//merging required..
-							if (frag1 != null)
-							{
-								if (frag1.GetScore() > frag2.GetScore())
-								{
-									bestScoringFragNum = frag1Num;
-									worstScoringFragNum = frag2Num;
-								}
-								else
-								{
-									bestScoringFragNum = frag2Num;
-									worstScoringFragNum = frag1Num;
-								}
-								frag1.Merge(frag2);
-								frag[worstScoringFragNum] = null;
-								mergingStillBeingDone = true;
-								frag[bestScoringFragNum] = frag1;
-							}
-						}
-					}
-				}
-				while (mergingStillBeingDone);
-		}
-		
-		
-		/// <summary> Highlights terms in the  text , extracting the most relevant sections
-		/// and concatenating the chosen fragments with a separator (typically "...").
-		/// The document text is analysed in chunks to record hit statistics
-		/// across the document. After accumulating stats, the fragments with the highest scores
-		/// are returned in order as "separator" delimited strings.
-		/// 
-		/// </summary>
-		/// <param name="text">       text to highlight terms in
-		/// </param>
-		/// <param name="maxNumFragments"> the maximum number of fragments.
-		/// </param>
-		/// <param name="separator"> the separator used to intersperse the document fragments (typically "...")
-		/// 
-		/// </param>
-		/// <returns> highlighted text
-		/// </returns>
-		public System.String GetBestFragments(TokenStream tokenStream, System.String text, int maxNumFragments, System.String separator)
-		{
-			System.String[] sections = GetBestFragments(tokenStream, text, maxNumFragments);
-			System.Text.StringBuilder result = new System.Text.StringBuilder();
-			for (int i = 0; i < sections.Length; i++)
-			{
-				if (i > 0)
-				{
-					result.Append(separator);
-				}
-				result.Append(sections[i]);
-			}
-			return result.ToString();
-		}
-		
-		/// <returns> the maximum number of bytes to be tokenized per doc 
-		/// </returns>
-		public virtual int GetMaxDocBytesToAnalyze()
-		{
-			return maxDocBytesToAnalyze;
-		}
-		
-		/// <param name="byteCount">the maximum number of bytes to be tokenized per doc
-		/// (This can improve performance with large documents)
-		/// </param>
-		public virtual void  SetMaxDocBytesToAnalyze(int byteCount)
-		{
-			maxDocBytesToAnalyze = byteCount;
-		}
-		
-		
-		public virtual IFragmenter GetTextFragmenter()
-		{
-			return textFragmenter;
-		}
-
-        /// <param name="fragmenter"> </param>
-		public virtual void  SetTextFragmenter(IFragmenter fragmenter)
-		{
-			textFragmenter = fragmenter;
-		}
-		
-		/// <returns> Object used to score each text fragment 
-		/// </returns>
-		public virtual IScorer GetFragmentScorer()
-		{
-			return fragmentScorer;
-		}
-
-
-        /// <param name="scorer">
-		/// </param>
-		public virtual void  SetFragmentScorer(IScorer scorer)
-		{
-			fragmentScorer = scorer;
-		}
-		
-		public virtual IEncoder GetEncoder()
-		{
-			return encoder;
-		}
-		public virtual void  SetEncoder(IEncoder encoder)
-		{
-			this.encoder = encoder;
-		}
-	}
-
-    class FragmentQueue : PriorityQueue<TextFragment>
-	{
-		public FragmentQueue(int size)
-		{
-			Initialize(size);
-		}
-
-        public override bool LessThan(TextFragment a, TextFragment b)
-		{
-			if (a.GetScore() == b.GetScore())
-				return a.fragNum > b.fragNum;
-			else
-				return a.GetScore() < b.GetScore();
-		}
-	}
+        /// a stream of tokens identified in the text parameter, including offset information.
+        /// This is typically produced by an analyzer re-parsing a document's
+        /// text. Some work may be done on retrieving TokenStreams more efficiently
+        /// by adding support for storing original text position data in the Lucene
+        /// index but this support is not currently available (as of Lucene 1.4 rc2).
+        /// </param>
+        /// <param name="text">text to highlight terms in</param>
+        /// <returns>highlighted text fragment or null if no terms found</returns>
+        /// <exception cref="InvalidTokenOffsetsException">thrown if any token's endOffset exceeds the provided text's length</exception>
+        public String GetBestFragment(TokenStream tokenStream, String text)
+        {
+            String[] results = GetBestFragments(tokenStream, text, 1);
+            if (results.Length > 0)
+            {
+                return results[0];
+            }
+            return null;
+        }
+
+        /// <summary>
+        /// Highlights chosen terms in a text, extracting the most relevant sections.
+        /// This is a convenience method that calls <see cref="GetBestFragments(TokenStream, string, int)"/>
+        /// </summary>
+        /// <param name="analyzer">the analyzer that will be used to split <code>text</code> into chunks</param>
+        /// <param name="fieldName">the name of the field being highlighted (used by analyzer)</param>
+        /// <param name="text">text to highlight terms in</param>
+        /// <param name="maxNumFragments">the maximum number of fragments.</param>
+        /// <returns>highlighted text fragments (between 0 and maxNumFragments number of fragments)</returns>
+        /// <exception cref="InvalidTokenOffsetsException">thrown if any token's endOffset exceeds the provided text's length</exception>
+        public String[] GetBestFragments(
+            Analyzer analyzer,
+            String fieldName,
+            String text,
+            int maxNumFragments)
+        {
+            TokenStream tokenStream = analyzer.TokenStream(fieldName, new StringReader(text));
+            return GetBestFragments(tokenStream, text, maxNumFragments);
+        }
+
+        /// <summary>
+        /// Highlights chosen terms in a text, extracting the most relevant sections.
+        /// The document text is analysed in chunks to record hit statistics
+        /// across the document. After accumulating stats, the fragments with the highest scores
+        /// are returned as an array of strings in order of score (contiguous fragments are merged into
+        /// one in their original order to improve readability)
+        /// </summary>
+        /// <param name="tokenStream"></param>
+        /// <param name="text">text to highlight terms in</param>
+        /// <param name="maxNumFragments">the maximum number of fragments.</param>
+        /// <returns>highlighted text fragments (between 0 and maxNumFragments number of fragments)</returns>
+        /// <exception cref="InvalidTokenOffsetsException">thrown if any token's endOffset exceeds the provided text's length</exception>
+        public String[] GetBestFragments(TokenStream tokenStream, String text, int maxNumFragments)
+        {
+            maxNumFragments = Math.Max(1, maxNumFragments); //sanity check
+
+            TextFragment[] frag = GetBestTextFragments(tokenStream, text, true, maxNumFragments);
+
+            //Get text
+            var fragTexts = new List<String>();
+            for (int i = 0; i < frag.Length; i++)
+            {
+                if ((frag[i] != null) && (frag[i].Score > 0))
+                {
+                    fragTexts.Add(frag[i].ToString());
+                }
+            }
+            return fragTexts.ToArray();
+        }
+
+        /// <summary>
+        /// Low level api to get the most relevant (formatted) sections of the document.
+        /// This method has been made public to allow visibility of score information held in TextFragment objects.
+        /// Thanks to Jason Calabrese for help in redefining the interface.
+        /// </summary>
+        public TextFragment[] GetBestTextFragments(
+            TokenStream tokenStream,
+            String text,
+            bool mergeContiguousFragments,
+            int maxNumFragments)
+        {
+            var docFrags = new List<TextFragment>();
+            var newText = new StringBuilder();
+
+            var termAtt = tokenStream.AddAttribute<TermAttribute>();
+            var offsetAtt = tokenStream.AddAttribute<OffsetAttribute>();
+            tokenStream.AddAttribute<PositionIncrementAttribute>();
+            tokenStream.Reset();
+
+            var currentFrag = new TextFragment(newText, newText.Length, docFrags.Count);
+            var newStream = _fragmentScorer.Init(tokenStream);
+            if (newStream != null)
+            {
+                tokenStream = newStream;
+            }
+            _fragmentScorer.StartFragment(currentFrag);
+            docFrags.Add(currentFrag);
+
+            var fragQueue = new FragmentQueue(maxNumFragments);
+
+            try
+            {
+
+                String tokenText;
+                int startOffset;
+                int endOffset;
+                int lastEndOffset = 0;
+                _textFragmenter.Start(text, tokenStream);
+
+                var tokenGroup = new TokenGroup(tokenStream);
+
+                for (bool next = tokenStream.IncrementToken();
+                     next && (offsetAtt.StartOffset < _maxDocCharsToAnalyze);
+                     next = tokenStream.IncrementToken())
+                {
+                    if ((offsetAtt.EndOffset > text.Length)
+                        ||
+                        (offsetAtt.StartOffset > text.Length)
+                        )
+                    {
+                        throw new InvalidTokenOffsetsException("Token " + termAtt.Term()
+                                                               + " exceeds length of provided text sized " + text.Length);
+                    }
+                    if ((tokenGroup.GetNumTokens() > 0) && (tokenGroup.IsDistinct()))
+                    {
+                        //the current token is distinct from previous tokens -
+                        // markup the cached token group info
+                        startOffset = tokenGroup.MatchStartOffset;
+                        endOffset = tokenGroup.MatchEndOffset;
+                        tokenText = text.Substring(startOffset, endOffset - startOffset);
+                        String markedUpText = _formatter.HighlightTerm(_encoder.EncodeText(tokenText), tokenGroup);
+                        //store any whitespace etc from between this and last group
+                        if (startOffset > lastEndOffset)
+                            newText.Append(_encoder.EncodeText(text.Substring(lastEndOffset, startOffset - lastEndOffset)));
+                        newText.Append(markedUpText);
+                        lastEndOffset = Math.Max(endOffset, lastEndOffset);
+                        tokenGroup.Clear();
+
+                        //check if current token marks the start of a new fragment
+                        if (_textFragmenter.IsNewFragment())
+                        {
+                            currentFrag.Score = _fragmentScorer.GetFragmentScore();
+                            //record stats for a new fragment
+                            currentFrag.TextEndPos = newText.Length;
+                            currentFrag = new TextFragment(newText, newText.Length, docFrags.Count);
+                            _fragmentScorer.StartFragment(currentFrag);
+                            docFrags.Add(currentFrag);
+                        }
+                    }
+
+                    tokenGroup.AddToken(_fragmentScorer.GetTokenScore());
+
+                    //				if(lastEndOffset>maxDocBytesToAnalyze)
+                    //				{
+                    //					break;
+                    //				}
+                }
+                currentFrag.Score = _fragmentScorer.GetFragmentScore();
+
+                if (tokenGroup.NumTokens > 0)
+                {
+                    //flush the accumulated text (same code as in above loop)
+                    startOffset = tokenGroup.MatchStartOffset;
+                    endOffset = tokenGroup.MatchEndOffset;
+                    tokenText = text.Substring(startOffset, endOffset - startOffset);
+                    var markedUpText = _formatter.HighlightTerm(_encoder.EncodeText(tokenText), tokenGroup);
+                    //store any whitespace etc from between this and last group
+                    if (startOffset > lastEndOffset)
+                        newText.Append(_encoder.EncodeText(text.Substring(lastEndOffset, startOffset - lastEndOffset)));
+                    newText.Append(markedUpText);
+                    lastEndOffset = Math.Max(lastEndOffset, endOffset);
+                }
+
+                //Test what remains of the original text beyond the point where we stopped analyzing 
+                if (
+                    //					if there is text beyond the last token considered..
+                    (lastEndOffset < text.Length)
+                    &&
+                    //					and that text is not too large...
+                    (text.Length <= _maxDocCharsToAnalyze)
+                    )
+                {
+                    //append it to the last fragment
+                    newText.Append(_encoder.EncodeText(text.Substring(lastEndOffset)));
+                }
+
+                currentFrag.TextEndPos = newText.Length;
+
+                //sort the most relevant sections of the text
+                foreach (var f in docFrags)
+                {
+                    currentFrag = f;
+
+                    //If you are running with a version of Lucene before 11th Sept 03
+                    // you do not have PriorityQueue.insert() - so uncomment the code below
+                    /*
+                                        if (currentFrag.getScore() >= minScore)
+                                        {
+                                            fragQueue.put(currentFrag);
+                                            if (fragQueue.size() > maxNumFragments)
+                                            { // if hit queue overfull
+                                                fragQueue.pop(); // remove lowest in hit queue
+                                                minScore = ((TextFragment) fragQueue.top()).getScore(); // reset minScore
+                                            }
+
+
+                                        }
+                    */
+                    //The above code caused a problem as a result of Christoph Goller's 11th Sept 03
+                    //fix to PriorityQueue. The correct method to use here is the new "insert" method
+                    // USE ABOVE CODE IF THIS DOES NOT COMPILE!
+                    fragQueue.InsertWithOverflow(currentFrag);
+                }
+
+                //return the most relevant fragments
+                var frag = new TextFragment[fragQueue.Size()];
+                for (int i = frag.Length - 1; i >= 0; i--)
+                {
+                    frag[i] = fragQueue.Pop();
+                }
+
+                //merge any contiguous fragments to improve readability
+                if (mergeContiguousFragments)
+                {
+                    MergeContiguousFragments(frag);
+                    frag = frag.Where(t => (t != null) && (t.Score > 0)).ToArray();
+                }
+
+                return frag;
+
+            }
+            finally
+            {
+                if (tokenStream != null)
+                {
+                    try
+                    {
+                        tokenStream.Close();
+                    }
+                    catch (Exception)
+                    {
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Improves readability of a score-sorted list of TextFragments by merging any fragments
+        /// that were contiguous in the original text into one larger fragment with the correct order.
+        /// This will leave a "null" in the array entry for the lesser scored fragment. 
+        /// </summary>
+        /// <param name="frag">An array of document fragments in descending score</param>
+        private void MergeContiguousFragments(TextFragment[] frag)
+        {
+            bool mergingStillBeingDone;
+            if (frag.Length > 1)
+                do
+                {
+                    mergingStillBeingDone = false; //initialise loop control flag
+                    //for each fragment, scan other frags looking for contiguous blocks
+                    for (int i = 0; i < frag.Length; i++)
+                    {
+                        if (frag[i] == null)
+                        {
+                            continue;
+                        }
+                        //merge any contiguous blocks 
+                        for (int x = 0; x < frag.Length; x++)
+                        {
+                            if (frag[x] == null)
+                            {
+                                continue;
+                            }
+                            if (frag[i] == null)
+                            {
+                                break;
+                            }
+                            TextFragment frag1 = null;
+                            TextFragment frag2 = null;
+                            int frag1Num = 0;
+                            int frag2Num = 0;
+                            int bestScoringFragNum;
+                            int worstScoringFragNum;
+                            //if blocks are contiguous....
+                            if (frag[i].Follows(frag[x]))
+                            {
+                                frag1 = frag[x];
+                                frag1Num = x;
+                                frag2 = frag[i];
+                                frag2Num = i;
+                            }
+                            else if (frag[x].Follows(frag[i]))
+                            {
+                                frag1 = frag[i];
+                                frag1Num = i;
+                                frag2 = frag[x];
+                                frag2Num = x;
+                            }
+                            //merging required..
+                            if (frag1 != null)
+                            {
+                                if (frag1.Score > frag2.Score)
+                                {
+                                    bestScoringFragNum = frag1Num;
+                                    worstScoringFragNum = frag2Num;
+                                }
+                                else
+                                {
+                                    bestScoringFragNum = frag2Num;
+                                    worstScoringFragNum = frag1Num;
+                                }
+                                frag1.Merge(frag2);
+                                frag[worstScoringFragNum] = null;
+                                mergingStillBeingDone = true;
+                                frag[bestScoringFragNum] = frag1;
+                            }
+                        }
+                    }
+                } while (mergingStillBeingDone);
+        }
+
+        /// <summary>
+        /// Highlights terms in the  text , extracting the most relevant sections
+        /// and concatenating the chosen fragments with a separator (typically "...").
+        /// The document text is analysed in chunks to record hit statistics
+        /// across the document. After accumulating stats, the fragments with the highest scores
+        /// are returned in order as "separator" delimited strings.
+        /// </summary>
+        /// <param name="tokenStream"></param>
+        /// <param name="text">text to highlight terms in</param>
+        /// <param name="maxNumFragments">the maximum number of fragments.</param>
+        /// <param name="separator">the separator used to intersperse the document fragments (typically "...")</param>
+        /// <returns>highlighted text</returns>
+        public String GetBestFragments(
+            TokenStream tokenStream,
+            String text,
+            int maxNumFragments,
+            String separator)
+        {
+            string[] sections = GetBestFragments(tokenStream, text, maxNumFragments);
+            StringBuilder result = new StringBuilder();
+            for (int i = 0; i < sections.Length; i++)
+            {
+                if (i > 0)
+                {
+                    result.Append(separator);
+                }
+                result.Append(sections[i]);
+            }
+            return result.ToString();
+        }
+
+        public int MaxDocCharsToAnalyze
+        {
+            get { return _maxDocCharsToAnalyze; }
+            set { this._maxDocCharsToAnalyze = value; }
+        }
+
+
+        public IFragmenter TextFragmenter
+        {
+            get { return _textFragmenter; }
+            set { _textFragmenter = value; }
+        }
+
+        public IScorer FragmentScorer
+        {
+            get { return _fragmentScorer; }
+            set { _fragmentScorer = value; }
+        }
+
+        public IEncoder Encoder
+        {
+            get { return _encoder; }
+            set { this._encoder = value; }
+        }
+    }
+
+    internal class FragmentQueue : PriorityQueue<TextFragment>
+    {
+        public FragmentQueue(int size)
+        {
+            Initialize(size);
+        }
+
+        public override bool LessThan(TextFragment fragA, TextFragment fragB)
+        {
+            if (fragA.Score == fragB.Score)
+                return fragA.FragNum > fragB.FragNum;
+            else
+                return fragA.Score < fragB.Score;
+        }
+    }
 }

Added: incubator/lucene.net/trunk/src/contrib/Highlighter/IFormatter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/IFormatter.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/IFormatter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/IFormatter.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Search.Highlight
+{
+	/// <summary> Processes terms found in the original text, typically by applying some form 
+	/// of mark-up to highlight terms in HTML search results pages.</summary>
+	public interface IFormatter
+	{
+		/// <param name="originalText">The section of text being considered for markup</param>
+		/// <param name="tokenGroup">contains one or several overlapping Tokens along with
+		/// their scores and positions.</param>
+		string HighlightTerm(System.String originalText, TokenGroup tokenGroup);
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/src/contrib/Highlighter/IFragmenter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/IFragmenter.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/IFragmenter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/IFragmenter.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Search.Highlight
+{
+
+    /// <summary> Implements the policy for breaking text into multiple fragments for consideration
+    /// by the <see cref="Highlighter"/> class. A sophisticated implementation may do this on the basis
+    /// of detecting end of sentences in the text. 
+    /// </summary>
+    /// <author>  mark@searcharea.co.uk
+    /// </author>
+    public interface IFragmenter
+    {
+        /// <summary>
+        /// Initializes the Fragmenter. You can grab references to the Attributes you are
+        /// interested in from tokenStream and then access the values in {@link #isNewFragment()}.
+        /// </summary>
+        /// <param name="originalText">the original source text</param>
+        ///<param name="tokenStream">tokenStream the <see cref="TokenStream" /> to be fragmented</param>
+        void Start(string originalText, TokenStream tokenStream);
+
+        /// <summary>
+        /// Test to see if this token from the stream should be held in a new
+        /// TextFragment. Every time this is called, the TokenStream
+        /// passed to start(String, TokenStream) will have been incremented.
+        /// </summary>
+        bool IsNewFragment();
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/src/contrib/Highlighter/IScorer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/IScorer.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/IScorer.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/IScorer.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Search.Highlight
+{
+    /// <summary> Adds to the score for a fragment based on its tokens</summary>
+    public interface IScorer
+    {
+        /// <summary>
+        /// Called to init the Scorer with a {@link TokenStream}. You can grab references to
+        /// the attributes you are interested in here and access them from {@link #getTokenScore()}.
+        /// </summary>
+        /// <param name="tokenStream">the {@link TokenStream} that will be scored.</param>
+        /// <returns>
+        /// either a {@link TokenStream} that the Highlighter should continue using (eg
+        /// if you read the tokenSream in this method) or null to continue
+        /// using the same {@link TokenStream} that was passed in.
+        /// </returns> 
+        /// <exception cref="IOException"></exception>
+        ///
+        TokenStream Init(TokenStream tokenStream);
+
+        /// <summary>
+        /// Called when a new fragment is started for consideration.
+        /// </summary>
+        /// <param name="newFragment">the fragment that will be scored next</param>
+        void StartFragment(TextFragment newFragment);
+
+        /// <summary>
+        /// Called for each token in the current fragment. The {@link Highlighter} will
+        /// increment the {@link TokenStream} passed to init on every call.
+        /// </summary>
+        /// <returns>a score which is passed to the {@link Highlighter} class to influence the
+        /// mark-up of the text (this return value is NOT used to score the
+        /// fragment)</returns> 
+        float GetTokenScore();
+
+        ///<summary>
+        /// Called when the {@link Highlighter} has no more tokens for the current fragment -
+        /// the Scorer returns the weighting it has derived for the most recent
+        /// fragment, typically based on the results of {@link #getTokenScore()}.
+        /// </summary>
+        float GetFragmentScore();
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/NullFragmenter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/NullFragmenter.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/NullFragmenter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/NullFragmenter.cs Fri Apr  6 23:37:48 2012
@@ -15,12 +15,9 @@
  * limitations under the License.
  */
 
-using System;
 using Lucene.Net.Analysis;
-using Lucene.Net.Search.Highlight;
-using Token = Lucene.Net.Analysis.Token;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
 	
 	/// <summary> <see cref="IFragmenter"/> implementation which does not fragment the text.

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/QueryScorer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/QueryScorer.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/QueryScorer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/QueryScorer.cs Fri Apr  6 23:37:48 2012
@@ -19,7 +19,6 @@ using System;
 using System.Collections.Generic;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Highlight;
 using Lucene.Net.Index;
 using Lucene.Net.Search.Spans;
 using Lucene.Net.Support;
@@ -51,114 +50,102 @@ namespace Lucene.Net.Search.Highlight
         private bool skipInitExtractor;
         private bool wrapToCaching = true;
 
-        /**
-         * @param query Query to use for highlighting
-         */
-
+        /// <summary>
+        /// Constructs a new QueryScorer instance
+        /// </summary>
+        /// <param name="query">Query to use for highlighting</param>
         public QueryScorer(Query query)
         {
-            init(query, null, null, true);
+            Init(query, null, null, true);
         }
 
-        /**
-         * @param query Query to use for highlighting
-         * @param field Field to highlight - pass null to ignore fields
-         */
-
+        /// <summary>
+        /// Constructs a new QueryScorer instance
+        /// </summary>
+        /// <param name="query">Query to use for highlighting</param>
+        /// <param name="field">Field to highlight - pass null to ignore fields</param>
         public QueryScorer(Query query, String field)
         {
-            init(query, field, null, true);
+            Init(query, field, null, true);
         }
 
-        /**
-         * @param query Query to use for highlighting
-         * @param field Field to highlight - pass null to ignore fields
-         * @param reader {@link IndexReader} to use for quasi tf/idf scoring
-         */
-
+        /// <summary>
+        /// Constructs a new QueryScorer instance
+        /// </summary>
+        /// <param name="query">Query to use for highlighting</param>
+        /// <param name="reader"><see cref="IndexReader"/> to use for quasi tf/idf scoring</param>
+        /// <param name="field">Field to highlight - pass null to ignore fields</param>
         public QueryScorer(Query query, IndexReader reader, String field)
         {
-            init(query, field, reader, true);
+            Init(query, field, reader, true);
         }
 
-
-        /**
-         * @param query to use for highlighting
-         * @param reader {@link IndexReader} to use for quasi tf/idf scoring
-         * @param field to highlight - pass null to ignore fields
-         * @param defaultField
-         */
-
+        /// <summary>
+        /// Constructs a new QueryScorer instance
+        /// </summary>
+        /// <param name="query">Query to use for highlighting</param>
+        /// <param name="reader"><see cref="IndexReader"/> to use for quasi tf/idf scoring</param>
+        /// <param name="field">Field to highlight - pass null to ignore fields</param>
+        /// <param name="defaultField">The default field for queries with the field name unspecified</param>
         public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
         {
             this.defaultField = StringHelper.Intern(defaultField);
-            init(query, field, reader, true);
+            Init(query, field, reader, true);
         }
 
-        /**
-         * @param defaultField - The default field for queries with the field name unspecified
-         */
 
+        /// <summary>
+        /// Constructs a new QueryScorer instance
+        /// </summary>
+        /// <param name="query">Query to use for highlighting</param>
+        /// <param name="field">Field to highlight - pass null to ignore fields</param>
+        /// <param name="defaultField">The default field for queries with the field name unspecified</param>
         public QueryScorer(Query query, String field, String defaultField)
         {
             this.defaultField = StringHelper.Intern(defaultField);
-            init(query, field, null, true);
+            Init(query, field, null, true);
         }
 
-        /**
-         * @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s
-         */
-
+        /// <summary>
+        /// Constructs a new QueryScorer instance
+        /// </summary>
+        /// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param>
         public QueryScorer(WeightedSpanTerm[] weightedTerms)
         {
             this.fieldWeightedSpanTerms = new HashMap<String, WeightedSpanTerm>(weightedTerms.Length);
 
-            for (int i = 0; i < weightedTerms.Length; i++)
+            foreach (WeightedSpanTerm t in weightedTerms)
             {
-                WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[weightedTerms[i].term];
+                WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term];
 
                 if ((existingTerm == null) ||
-                    (existingTerm.weight < weightedTerms[i].weight))
+                    (existingTerm.Weight < t.Weight))
                 {
                     // if a term is defined more than once, always use the highest
-                    // scoring weight
-                    fieldWeightedSpanTerms[weightedTerms[i].term] = weightedTerms[i];
-                    maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].GetWeight());
+                    // scoring Weight
+                    fieldWeightedSpanTerms[t.Term] = t;
+                    maxTermWeight = Math.Max(maxTermWeight, t.Weight);
                 }
             }
             skipInitExtractor = true;
         }
 
-        /*
-         * (non-Javadoc)
-         *
-         * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
-         */
-
-        public float getFragmentScore()
+        /// <seealso cref="IScorer.GetFragmentScore()"/>
+        public float GetFragmentScore()
         {
             return totalScore;
         }
 
-        /**
-         *
-         * @return The highest weighted term (useful for passing to
-         *         GradientFormatter to set top end of coloring scale).
-         */
-
-        public float getMaxTermWeight()
+        /// <summary>
+        /// The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale).
+        /// </summary>
+        public float GetMaxTermWeight()
         {
             return maxTermWeight;
         }
 
-        /*
-         * (non-Javadoc)
-         *
-         * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
-         *      int)
-         */
-
-        public float getTokenScore()
+        /// <seealso cref="IScorer.GetTokenScore"/>
+        public float GetTokenScore()
         {
             position += posIncAtt.PositionIncrement;
             String termText = termAtt.Term();
@@ -170,13 +157,13 @@ namespace Lucene.Net.Search.Highlight
                 return 0;
             }
 
-            if (weightedSpanTerm.isPositionSensitive() &&
-                !weightedSpanTerm.checkPosition(position))
+            if (weightedSpanTerm.IsPositionSensitive() &&
+                !weightedSpanTerm.CheckPosition(position))
             {
                 return 0;
             }
 
-            float score = weightedSpanTerm.GetWeight();
+            float score = weightedSpanTerm.Weight;
 
             // found a query term - is it unique in this doc?
             if (!foundTerms.Contains(termText))
@@ -188,11 +175,8 @@ namespace Lucene.Net.Search.Highlight
             return score;
         }
 
-        /* (non-Javadoc)
-         * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
-         */
-
-        public TokenStream init(TokenStream tokenStream)
+        /// <seealso cref="IScorer.Init"/>
+        public TokenStream Init(TokenStream tokenStream)
         {
             position = -1;
             termAtt = tokenStream.AddAttribute<TermAttribute>();
@@ -203,28 +187,23 @@ namespace Lucene.Net.Search.Highlight
                 {
                     fieldWeightedSpanTerms.Clear();
                 }
-                return initExtractor(tokenStream);
+                return InitExtractor(tokenStream);
             }
             return null;
         }
 
-        /**
-         * Retrieve the {@link WeightedSpanTerm} for the specified token. Useful for passing
-         * Span information to a {@link Fragmenter}.
-         *
-         * @param token to get {@link WeightedSpanTerm} for
-         * @return WeightedSpanTerm for token
-         */
-
-        public WeightedSpanTerm getWeightedSpanTerm(String token)
+        /// <summary>
+        /// Retrieve the <see cref="WeightedSpanTerm"/> for the specified token. Useful for passing
+        /// Span information to a <see cref="IFragmenter"/>.
+        /// </summary>
+        /// <param name="token">token to get {@link WeightedSpanTerm} for</param>
+        /// <returns>WeightedSpanTerm for token</returns>
+        public WeightedSpanTerm GetWeightedSpanTerm(String token)
         {
             return fieldWeightedSpanTerms[token];
         }
-
-        /**
-         */
-
-        private void init(Query query, String field, IndexReader reader, bool expandMultiTermQuery)
+        
+        private void Init(Query query, String field, IndexReader reader, bool expandMultiTermQuery)
         {
             this.reader = reader;
             this.expandMultiTermQuery = expandMultiTermQuery;
@@ -232,76 +211,57 @@ namespace Lucene.Net.Search.Highlight
             this.field = field;
         }
 
-        private TokenStream initExtractor(TokenStream tokenStream)
+        private TokenStream InitExtractor(TokenStream tokenStream)
         {
             WeightedSpanTermExtractor qse = defaultField == null
                                                 ? new WeightedSpanTermExtractor()
                                                 : new WeightedSpanTermExtractor(defaultField);
 
-            qse.setExpandMultiTermQuery(expandMultiTermQuery);
-            qse.setWrapIfNotCachingTokenFilter(wrapToCaching);
+            qse.SetExpandMultiTermQuery(expandMultiTermQuery);
+            qse.SetWrapIfNotCachingTokenFilter(wrapToCaching);
             if (reader == null)
             {
-                this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
+                this.fieldWeightedSpanTerms = qse.GetWeightedSpanTerms(query,
                                                                        tokenStream, field);
             }
             else
             {
-                this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
+                this.fieldWeightedSpanTerms = qse.GetWeightedSpanTermsWithScores(query,
                                                                                  tokenStream, field, reader);
             }
-            if (qse.isCachedTokenStream())
+            if (qse.IsCachedTokenStream())
             {
-                return qse.getTokenStream();
+                return qse.GetTokenStream();
             }
 
             return null;
         }
 
-        /*
-         * (non-Javadoc)
-         *
-         * @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
-         */
-
-        public void startFragment(TextFragment newFragment)
+        /// <seealso cref="IScorer.StartFragment"/>
+        public void StartFragment(TextFragment newFragment)
         {
             foundTerms = new HashSet<String>();
             totalScore = 0;
         }
 
-        /**
-         * @return true if multi-term queries should be expanded
-         */
-
-        public bool isExpandMultiTermQuery()
-        {
-            return expandMultiTermQuery;
-        }
-
-        /**
-         * Controls whether or not multi-term queries are expanded
-         * against a {@link MemoryIndex} {@link IndexReader}.
-         * 
-         * @param expandMultiTermQuery true if multi-term queries should be expanded
-         */
-
-        public void setExpandMultiTermQuery(bool expandMultiTermQuery)
-        {
-            this.expandMultiTermQuery = expandMultiTermQuery;
-        }
-
-        /**
-         * By default, {@link TokenStream}s that are not of the type
-         * {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
-         * ensure an efficient reset - if you are already using a different caching
-         * {@link TokenStream} impl and you don't want it to be wrapped, set this to
-         * false.
-         * 
-         * @param wrap
-         */
-
-        public void setWrapIfNotCachingTokenFilter(bool wrap)
+        /// <summary>
+        /// Controls whether or not multi-term queries are expanded
+        /// against a <see cref="MemoryIndex"/> <see cref="IndexReader"/>.
+        /// </summary>
+        public bool IsExpandMultiTermQuery
+        {
+            get { return expandMultiTermQuery; }
+            set { this.expandMultiTermQuery = value; }
+        }
+
+        /// <summary>
+        /// By default, <see cref="TokenStream"/>s that are not of the type
+        /// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/> to
+        /// ensure an efficient reset - if you are already using a different caching
+        /// <see cref="TokenStream"/> impl and you don't want it to be wrapped, set this to
+        /// false.
+        /// </summary>
+        public void SetWrapIfNotCachingTokenFilter(bool wrap)
         {
             this.wrapToCaching = wrap;
         }

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermExtractor.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermExtractor.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermExtractor.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermExtractor.cs Fri Apr  6 23:37:48 2012
@@ -19,7 +19,6 @@ using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
-using Lucene.Net.Highlight;
 using Lucene.Net.Index;
 using Lucene.Net.Util;
 
@@ -61,7 +60,7 @@ namespace Lucene.Net.Search.Highlight
             {
                 try
                 {
-                    int docFreq = reader.DocFreq(new Term(fieldName, t.term));
+                    int docFreq = reader.DocFreq(new Term(fieldName, t.Term));
                     // docFreq counts deletes
                     if (totalNumDocs < docFreq)
                     {
@@ -69,7 +68,7 @@ namespace Lucene.Net.Search.Highlight
                     }
                     //IDF algorithm taken from DefaultSimilarity class
                     var idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0);
-                    t.weight *= idf;
+                    t.Weight *= idf;
                 }
                 catch (IOException e)
                 {
@@ -152,8 +151,8 @@ namespace Lucene.Net.Search.Highlight
             BooleanClause[] queryClauses = query.GetClauses();
             for (int i = 0; i < queryClauses.Length; i++)
             {
-                if (prohibited || queryClauses[i].GetOccur() != BooleanClause.Occur.MUST_NOT)
-                    GetTerms(queryClauses[i].GetQuery(), terms, prohibited, fieldName);
+                if (prohibited || queryClauses[i].Occur != Occur.MUST_NOT)
+                    GetTerms(queryClauses[i].Query, terms, prohibited, fieldName);
             }
 		}
         private static void GetTermsFromFilteredQuery(FilteredQuery query, HashSet<WeightedTerm> terms, bool prohibited, string fieldName)

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermScorer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermScorer.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermScorer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/QueryTermScorer.cs Fri Apr  6 23:37:48 2012
@@ -4,7 +4,6 @@ using System.Linq;
 using System.Text;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Highlight;
 using Lucene.Net.Index;
 using Lucene.Net.Support;
 
@@ -72,14 +71,14 @@ namespace Lucene.Net.Search.Highlight
             termsToFind = new HashMap<String, WeightedTerm>();
             for (int i = 0; i < weightedTerms.Length; i++)
             {
-                WeightedTerm existingTerm = termsToFind[weightedTerms[i].term];
+                WeightedTerm existingTerm = termsToFind[weightedTerms[i].Term];
                 if ((existingTerm == null)
-                    || (existingTerm.weight < weightedTerms[i].weight))
+                    || (existingTerm.Weight < weightedTerms[i].Weight))
                 {
                     // if a term is defined more than once, always use the highest scoring
-                    // weight
-                    termsToFind[weightedTerms[i].term] = weightedTerms[i];
-                    maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].GetWeight());
+                    // Weight
+                    termsToFind[weightedTerms[i].Term] = weightedTerms[i];
+                    maxTermWeight = Math.Max(maxTermWeight, weightedTerms[i].Weight);
                 }
             }
         }
@@ -88,7 +87,7 @@ namespace Lucene.Net.Search.Highlight
          * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
          */
 
-        public TokenStream init(TokenStream tokenStream)
+        public TokenStream Init(TokenStream tokenStream)
         {
             termAtt = tokenStream.AddAttribute<TermAttribute>();
             return null;
@@ -102,7 +101,7 @@ namespace Lucene.Net.Search.Highlight
          * .lucene.search.highlight.TextFragment)
          */
 
-        public void startFragment(TextFragment newFragment)
+        public void StartFragment(TextFragment newFragment)
         {
             uniqueTermsInFragment = new HashSet<String>();
             currentTextFragment = newFragment;
@@ -115,7 +114,7 @@ namespace Lucene.Net.Search.Highlight
          * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
          */
 
-        public float getTokenScore()
+        public float GetTokenScore()
         {
             String termText = termAtt.Term();
 
@@ -128,10 +127,10 @@ namespace Lucene.Net.Search.Highlight
             // found a query term - is it unique in this doc?
             if (!uniqueTermsInFragment.Contains(termText))
             {
-                totalScore += queryTerm.GetWeight();
+                totalScore += queryTerm.Weight;
                 uniqueTermsInFragment.Add(termText);
             }
-            return queryTerm.GetWeight();
+            return queryTerm.Weight;
         }
 
 
@@ -139,7 +138,7 @@ namespace Lucene.Net.Search.Highlight
          * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
          */
 
-        public float getFragmentScore()
+        public float GetFragmentScore()
         {
             return totalScore;
         }
@@ -151,7 +150,7 @@ namespace Lucene.Net.Search.Highlight
          * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
          */
 
-        public void allFragmentsProcessed()
+        public void AllFragmentsProcessed()
         {
             // this class has no special operations to perform at end of processing
         }
@@ -162,7 +161,7 @@ namespace Lucene.Net.Search.Highlight
          *         to set top end of coloring scale.
          */
 
-        public float getMaxTermWeight()
+        public float GetMaxTermWeight()
         {
             return maxTermWeight;
         }

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleFragmenter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleFragmenter.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleFragmenter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleFragmenter.cs Fri Apr  6 23:37:48 2012
@@ -18,10 +18,8 @@
 using System;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Search.Highlight;
-using Token = Lucene.Net.Analysis.Token;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
 
     /// <summary> <see cref="IFragmenter"/> implementation which breaks text up into same-size 
@@ -81,7 +79,7 @@ namespace Lucene.Net.Highlight
          * @return size in number of characters of each fragment
          */
 
-        public int getFragmentSize()
+        public int GetFragmentSize()
         {
             return fragmentSize;
         }
@@ -90,7 +88,7 @@ namespace Lucene.Net.Highlight
          * @param size size in characters of each fragment
          */
 
-        public void setFragmentSize(int size)
+        public void SetFragmentSize(int size)
         {
             fragmentSize = size;
         }

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLEncoder.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLEncoder.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLEncoder.cs Fri Apr  6 23:37:48 2012
@@ -17,9 +17,8 @@
 
 using System;
 using System.Text;
-using Lucene.Net.Search.Highlight;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
     /// <summary> Simple <see cref="IEncoder"/> implementation to escape text for HTML output</summary>
     public class SimpleHTMLEncoder : IEncoder

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLFormatter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLFormatter.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLFormatter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleHTMLFormatter.cs Fri Apr  6 23:37:48 2012
@@ -15,10 +15,7 @@
  * limitations under the License.
  */
 
-using System;
-using Lucene.Net.Search.Highlight;
-
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
 	/// <summary> Simple <see cref="IFormatter"/> implementation to highlight terms with a pre and post tag</summary>
 	/// <author>  MAHarwood

Added: incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleSpanFragmenter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleSpanFragmenter.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleSpanFragmenter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/SimpleSpanFragmenter.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,91 @@
+ï»¿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Search.Highlight
+{
+    public class SimpleSpanFragmenter : IFragmenter
+    {
+        private static int DEFAULT_FRAGMENT_SIZE = 100;
+        private int fragmentSize;
+        private int currentNumFrags;
+        private int position = -1;
+        private QueryScorer queryScorer;
+        private int waitForPos = -1;
+        private int textSize;
+        private TermAttribute termAtt;
+        private PositionIncrementAttribute posIncAtt;
+        private OffsetAttribute offsetAtt;
+
+        /// <param name="queryScorer">QueryScorer that was used to score hits</param>
+        public SimpleSpanFragmenter(QueryScorer queryScorer)
+            : this(queryScorer, DEFAULT_FRAGMENT_SIZE)
+        {
+
+        }
+
+        /// <param name="queryScorer">QueryScorer that was used to score hits</param>
+        /// <param name="fragmentSize">size in bytes of each fragment</param>
+        public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize)
+        {
+            this.fragmentSize = fragmentSize;
+            this.queryScorer = queryScorer;
+        }
+
+        /// <seealso cref="IFragmenter.IsNewFragment"/>
+        public bool IsNewFragment()
+        {
+            position += posIncAtt.PositionIncrement;
+
+            if (waitForPos == position)
+            {
+                waitForPos = -1;
+            }
+            else if (waitForPos != -1)
+            {
+                return false;
+            }
+
+            WeightedSpanTerm wSpanTerm = queryScorer.GetWeightedSpanTerm(termAtt.Term());
+
+            if (wSpanTerm != null)
+            {
+                List<PositionSpan> positionSpans = wSpanTerm.GetPositionSpans();
+
+                for (int i = 0; i < positionSpans.Count; i++)
+                {
+                    if (positionSpans[i].Start == position)
+                    {
+                        waitForPos = positionSpans[i].End + 1;
+                        break;
+                    }
+                }
+            }
+
+            bool isNewFrag = offsetAtt.EndOffset >= (fragmentSize*currentNumFrags)
+                             && (textSize - offsetAtt.EndOffset) >= ((uint) fragmentSize >> 1);
+
+
+            if (isNewFrag)
+            {
+                currentNumFrags++;
+            }
+
+            return isNewFrag;
+        }
+
+        /// <seealso cref="IFragmenter.Start(string, TokenStream)"/>
+        public void Start(String originalText, TokenStream tokenStream)
+        {
+            position = -1;
+            currentNumFrags = 1;
+            textSize = originalText.Length;
+            termAtt = tokenStream.AddAttribute<TermAttribute>();
+            posIncAtt = tokenStream.AddAttribute<PositionIncrementAttribute>();
+            offsetAtt = tokenStream.AddAttribute<OffsetAttribute>();
+        }
+    }
+}

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/SpanGradientFormatter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/SpanGradientFormatter.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/SpanGradientFormatter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/SpanGradientFormatter.cs Fri Apr  6 23:37:48 2012
@@ -16,61 +16,58 @@
  */
 
 using System;
+using System.Text;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
-	/// <summary> Formats text with different color intensity depending on the score of the
-	/// term using the span tag.  GradientFormatter uses a bgcolor argument to the font tag which
-	/// doesn't work in Mozilla, thus this class.
-	/// 
-	/// </summary>
-	/// <seealso cref="GradientFormatter">
-	/// </seealso>
-	/// <author>  David Spencer dave@searchmorph.com
-	/// </author>
-	
-	public class SpanGradientFormatter:GradientFormatter
-	{
-		public SpanGradientFormatter(float maxScore, System.String minForegroundColor, System.String maxForegroundColor, System.String minBackgroundColor, System.String maxBackgroundColor):base(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor)
-		{
-		}
-		
-		
-		
-		public override System.String HighlightTerm(System.String originalText, TokenGroup tokenGroup)
-		{
-			if (tokenGroup.GetTotalScore() == 0)
-				return originalText;
-			float score = tokenGroup.GetTotalScore();
-			if (score == 0)
-			{
-				return originalText;
-			}
-			
-			// try to size sb correctly
-			System.Text.StringBuilder sb = new System.Text.StringBuilder(originalText.Length + EXTRA);
-			
-			sb.Append("<span style=\"");
-			if (highlightForeground)
-			{
-				sb.Append("color: ");
-				sb.Append(GetForegroundColorString(score));
-				sb.Append("; ");
-			}
-			if (highlightBackground)
-			{
-				sb.Append("background: ");
-				sb.Append(GetBackgroundColorString(score));
-				sb.Append("; ");
-			}
-			sb.Append("\">");
-			sb.Append(originalText);
-			sb.Append("</span>");
-			return sb.ToString();
-		}
-		
-		// guess how much extra text we'll add to the text we're highlighting to try to avoid a  StringBuffer resize
-		private const System.String TEMPLATE = "<span style=\"background: #EEEEEE; color: #000000;\">...</span>";
-		private static readonly int EXTRA = TEMPLATE.Length;
-	}
+    /// <summary>
+    /// Formats text with different color intensity depending on the score of the
+    /// term using the span tag.  GradientFormatter uses a bgcolor argument to the font tag which
+    /// doesn't work in Mozilla, thus this class.
+    /// </summary>
+    /// <seealso cref="GradientFormatter"/>
+    public class SpanGradientFormatter : GradientFormatter
+    {
+        // guess how much extra text we'll add to the text we're highlighting to try to avoid a  StringBuilder resize
+        private static readonly String TEMPLATE = "<span style=\"background: #EEEEEE; color: #000000;\">...</span>";
+        private static readonly int EXTRA = TEMPLATE.Length;
+
+        public SpanGradientFormatter(float maxScore, String minForegroundColor,
+                                     String maxForegroundColor, String minBackgroundColor,
+                                     String maxBackgroundColor)
+            : base(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor)
+        { }
+
+        public override String HighlightTerm(String originalText, TokenGroup tokenGroup)
+        {
+            if (tokenGroup.GetTotalScore() == 0)
+                return originalText;
+            float score = tokenGroup.GetTotalScore();
+            if (score == 0)
+            {
+                return originalText;
+            }
+
+            // try to size sb correctly
+            var sb = new StringBuilder(originalText.Length + EXTRA);
+
+            sb.Append("<span style=\"");
+            if (highlightForeground)
+            {
+                sb.Append("color: ");
+                sb.Append(GetForegroundColorString(score));
+                sb.Append("; ");
+            }
+            if (highlightBackground)
+            {
+                sb.Append("background: ");
+                sb.Append(GetBackgroundColorString(score));
+                sb.Append("; ");
+            }
+            sb.Append("\">");
+            sb.Append(originalText);
+            sb.Append("</span>");
+            return sb.ToString();
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/TextFragment.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/TextFragment.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/TextFragment.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/TextFragment.cs Fri Apr  6 23:37:48 2012
@@ -16,66 +16,58 @@
  */
 
 using System;
+using System.Text;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
-	/// <summary> Low-level class used to record information about a section of a document 
-	/// with a score.
-	/// </summary>
-	/// <author>  MAHarwood
-	/// 
-	/// 
-	/// </author>
-	public class TextFragment
-	{
-		internal System.Text.StringBuilder markedUpText;
-		internal int fragNum;
-		internal int textStartPos;
-		internal int textEndPos;
-		internal float score;
-		
-		public TextFragment(System.Text.StringBuilder markedUpText, int textStartPos, int fragNum)
-		{
-			this.markedUpText = markedUpText;
-			this.textStartPos = textStartPos;
-			this.fragNum = fragNum;
-		}
-		internal virtual void  SetScore(float score)
-		{
-			this.score = score;
-		}
-		public virtual float GetScore()
-		{
-			return score;
-		}
-		/// <param name="frag2">Fragment to be merged into this one
-		/// </param>
-		public virtual void  Merge(TextFragment frag2)
-		{
-			textEndPos = frag2.textEndPos;
-			score = System.Math.Max(score, frag2.score);
-		}
-		/// <param name="fragment">
-		/// </param>
-		/// <returns> true if this fragment follows the one passed
-		/// </returns>
-		public virtual bool Follows(TextFragment fragment)
-		{
-			return textStartPos == fragment.textEndPos;
-		}
-		
-		/// <returns> the fragment sequence number
-		/// </returns>
-		public virtual int GetFragNum()
-		{
-			return fragNum;
-		}
-		
-		/* Returns the marked-up text for this text fragment 
-		*/
-		public override System.String ToString()
-		{
-			return markedUpText.ToString(textStartPos, textEndPos - textStartPos);
-		}
-	}
+    /// <summary> Low-level class used to record information about a section of a document 
+    /// with a score.
+    /// </summary>
+    public class TextFragment
+    {
+        private StringBuilder markedUpText;
+
+
+        public TextFragment(StringBuilder markedUpText, int textStartPos, int fragNum)
+        {
+            this.markedUpText = markedUpText;
+            this.TextStartPos = textStartPos;
+            this.FragNum = fragNum;
+        }
+
+        public float Score { get; protected internal set; }
+        public int TextEndPos { get; protected internal set; }
+        public int TextStartPos { get; protected internal set; }
+
+        /// <summary>
+        /// the fragment sequence number
+        /// </summary>
+        public int FragNum { get; protected internal set; }
+
+
+        /// <summary></summary>
+        /// <param name="frag2">Fragment to be merged into this one</param>
+        public void Merge(TextFragment frag2)
+        {
+            TextEndPos = frag2.TextEndPos;
+            Score = Math.Max(Score, frag2.Score);
+        }
+
+        /// <summary>
+        /// true if this fragment follows the one passed
+        /// </summary>
+        public bool Follows(TextFragment fragment)
+        {
+            return TextStartPos == fragment.TextEndPos;
+        }
+
+        /// <summary>
+        /// Returns the marked-up text for this text fragment 
+        /// </summary>
+        public override String ToString()
+        {
+            return markedUpText.ToString(TextStartPos, TextEndPos - TextStartPos);
+        }
+
+    }
 }
\ No newline at end of file