You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2006/12/27 16:05:37 UTC

svn commit: r490508 [2/3] - in /incubator/lucene.net/trunk/C#/contrib/Highlighter.Net: ./ Highlighter.Net/ Test/

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/QueryScorer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/QueryScorer.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/QueryScorer.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/QueryScorer.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Token = Lucene.Net.Analysis.Token;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Query = Lucene.Net.Search.Query;
+
+namespace Lucene.Net.Highlight
+{
+	
+	/// <summary> {@link Scorer} implementation which scores text fragments by the number of unique query terms found.
+	/// This class uses the {@link QueryTermExtractor} class to process determine the query terms and 
+	/// their boosts to be used. 
+	/// </summary>
+	/// <author>  mark@searcharea.co.uk
+	/// </author>
+	//TODO: provide option to boost score of fragments near beginning of document 
+	// based on fragment.getFragNum()
+	public class QueryScorer : Scorer
+	{
+		internal TextFragment currentTextFragment = null;
+		internal System.Collections.Hashtable uniqueTermsInFragment;
+		internal float totalScore = 0;
+		internal float maxTermWeight = 0;
+		private System.Collections.Hashtable termsToFind;
+		
+		
+		/// <summary> </summary>
+		/// <param name="query">a Lucene query (ideally rewritten using query.rewrite 
+		/// before being passed to this class and the searcher)
+		/// </param>
+		public QueryScorer(Query query):this(QueryTermExtractor.GetTerms(query))
+		{
+		}
+		
+		/// <summary> </summary>
+		/// <param name="query">a Lucene query (ideally rewritten using query.rewrite 
+		/// before being passed to this class and the searcher)
+		/// </param>
+		/// <param name="fieldName">the Field name which is used to match Query terms
+		/// </param>
+		public QueryScorer(Query query, System.String fieldName):this(QueryTermExtractor.GetTerms(query, false, fieldName))
+		{
+		}
+		
+		/// <summary> </summary>
+		/// <param name="query">a Lucene query (ideally rewritten using query.rewrite 
+		/// before being passed to this class and the searcher)
+		/// </param>
+		/// <param name="reader">used to compute IDF which can be used to a) score selected fragments better 
+		/// b) use graded highlights eg set font color intensity
+		/// </param>
+		/// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based
+		/// </param>
+		public QueryScorer(Query query, IndexReader reader, System.String fieldName):this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName))
+		{
+		}
+		
+		public QueryScorer(WeightedTerm[] weightedTerms)
+		{
+			termsToFind = new System.Collections.Hashtable();
+			for (int i = 0; i < weightedTerms.Length; i++)
+			{
+				WeightedTerm existingTerm = (WeightedTerm) termsToFind[weightedTerms[i].term];
+				if ((existingTerm == null) || (existingTerm.weight < weightedTerms[i].weight))
+				{
+					//if a term is defined more than once, always use the highest scoring weight
+					termsToFind[weightedTerms[i].term] = weightedTerms[i];
+					maxTermWeight = System.Math.Max(maxTermWeight, weightedTerms[i].GetWeight());
+				}
+			}
+		}
+		
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.FragmentScorer#startFragment(Lucene.Net.Highlight.TextFragment)
+		*/
+		public virtual void  StartFragment(TextFragment newFragment)
+		{
+			uniqueTermsInFragment = new System.Collections.Hashtable();
+			currentTextFragment = newFragment;
+			totalScore = 0;
+		}
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.FragmentScorer#scoreToken(org.apache.lucene.analysis.Token)
+		*/
+		public virtual float GetTokenScore(Token token)
+		{
+			System.String termText = token.TermText();
+			
+			WeightedTerm queryTerm = (WeightedTerm) termsToFind[termText];
+			if (queryTerm == null)
+			{
+				//not a query term - return
+				return 0;
+			}
+			//found a query term - is it unique in this doc?
+			if (!uniqueTermsInFragment.Contains(termText))
+			{
+				totalScore += queryTerm.GetWeight();
+				uniqueTermsInFragment.Add(termText, termText);
+			}
+			return queryTerm.GetWeight();
+		}
+		
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.FragmentScorer#endFragment(Lucene.Net.Highlight.TextFragment)
+		*/
+		public virtual float GetFragmentScore()
+		{
+			return totalScore;
+		}
+		
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.FragmentScorer#allFragmentsProcessed()
+		*/
+		public virtual void  AllFragmentsProcessed()
+		{
+			//this class has no special operations to perform at end of processing
+		}
+		
+		/// <summary> </summary>
+		/// <returns> The highest weighted term (useful for passing to GradientFormatter to set
+		/// top end of coloring scale.  
+		/// </returns>
+		public virtual float GetMaxTermWeight()
+		{
+			return maxTermWeight;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/QueryTermExtractor.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/QueryTermExtractor.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/QueryTermExtractor.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/QueryTermExtractor.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using Term = Lucene.Net.Index.Term;
+using Query = Lucene.Net.Search.Query;
+
+namespace Lucene.Net.Highlight
+{
+	
+	/// <summary> Utility class used to extract the terms used in a query, plus any weights.
+	/// This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes
+	/// so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of 
+	/// expanded terms. 
+	/// 
+	/// </summary>
+	public sealed class QueryTermExtractor
+	{
+		
+		/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
+		/// 
+		/// </summary>
+		/// <param name="query">     Query to extract term texts from
+		/// </param>
+		/// <returns> an array of the terms used in a query, plus their weights.
+		/// </returns>
+		public static WeightedTerm[] GetTerms(Query query)
+		{
+			return GetTerms(query, false);
+		}
+		
+		/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
+		/// 
+		/// </summary>
+		/// <param name="query">     Query to extract term texts from
+		/// </param>
+		/// <param name="reader">used to compute IDF which can be used to a) score selected fragments better 
+		/// b) use graded highlights eg chaning intensity of font color
+		/// </param>
+		/// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based
+		/// </param>
+		/// <returns> an array of the terms used in a query, plus their weights.
+		/// </returns>
+		public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, System.String fieldName)
+		{
+			WeightedTerm[] terms = GetTerms(query, false, fieldName);
+			int totalNumDocs = reader.NumDocs();
+			for (int i = 0; i < terms.Length; i++)
+			{
+				try
+				{
+					int docFreq = reader.DocFreq(new Term(fieldName, terms[i].term));
+					//IDF algorithm taken from DefaultSimilarity class
+					float idf = (float) (System.Math.Log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
+					terms[i].weight *= idf;
+				}
+				catch (System.IO.IOException e)
+				{
+					//ignore 
+				}
+			}
+			return terms;
+		}
+		
+		/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
+		/// 
+		/// </summary>
+		/// <param name="query">     Query to extract term texts from
+		/// </param>
+		/// <param name="prohibited"><code>true</code> to extract "prohibited" terms, too
+		/// </param>
+		/// <param name="fieldName"> The fieldName used to filter query terms
+		/// </param>
+		/// <returns> an array of the terms used in a query, plus their weights.
+		/// </returns>
+		public static WeightedTerm[] GetTerms(Query query, bool prohibited, System.String fieldName)
+		{
+			System.Collections.Hashtable terms = new System.Collections.Hashtable();
+			if (fieldName != null)
+			{
+				fieldName = String.Intern(fieldName);
+			}
+			GetTerms(query, terms, prohibited, fieldName);
+
+            WeightedTerm[] result = new WeightedTerm[terms.Count];
+            int i = 0;
+            foreach (System.Object item in terms.Values)
+            {
+                result[i++] = (WeightedTerm) item;
+            }
+            return (result);
+		}
+		
+		/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
+		/// 
+		/// </summary>
+		/// <param name="query">     Query to extract term texts from
+		/// </param>
+		/// <param name="prohibited"><code>true</code> to extract "prohibited" terms, too
+		/// </param>
+		/// <returns> an array of the terms used in a query, plus their weights.
+		/// </returns>
+		public static WeightedTerm[] GetTerms(Query query, bool prohibited)
+		{
+			return GetTerms(query, prohibited, null);
+		}
+		
+		//fieldname MUST be interned prior to this call
+		private static void  GetTerms(Query query, System.Collections.Hashtable terms, bool prohibited, System.String fieldName)
+		{
+			try
+			{
+				System.Collections.Hashtable nonWeightedTerms = new System.Collections.Hashtable();
+				query.ExtractTerms(nonWeightedTerms);
+
+                foreach (Term term in terms.Values)
+                {
+                    if ((fieldName == null) || (term.Field() == fieldName))
+                    {
+                        WeightedTerm temp = new WeightedTerm(query.GetBoost(), term.Text());
+                        terms.Add(temp, temp);
+                    }
+                }
+
+                /*
+				for (System.Collections.IEnumerator iter = nonWeightedTerms.GetEnumerator(); iter.MoveNext(); )
+				{
+					Term term = (Term) iter.Current;
+					if ((fieldName == null) || (term.Field() == fieldName))
+					{
+                        WeightedTerm temp = new WeightedTerm(query.GetBoost(), term.Text());
+						terms.Add(temp, temp);
+					}
+				}
+                */
+			}
+			catch (System.NotSupportedException ignore)
+			{
+				//this is non-fatal for our purposes
+			}
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/Scorer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/Scorer.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/Scorer.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/Scorer.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Highlight
+{
+	
+	/// <summary> Adds to the score for a fragment based on its tokens</summary>
+	/// <author>  mark@searcharea.co.uk
+	/// </author>
+	public interface Scorer
+	{
+		/// <summary> called when a new fragment is started for consideration</summary>
+		/// <param name="">newFragment
+		/// </param>
+		void  StartFragment(TextFragment newFragment);
+		
+		/// <summary> Called for each token in the current fragment</summary>
+		/// <param name="token">The token to be scored
+		/// </param>
+		/// <returns> a score which is passed to the Highlighter class to influence the mark-up of the text
+		/// (this return value is NOT used to score the fragment)
+		/// </returns>
+		float GetTokenScore(Token token);
+		
+		
+		/// <summary> Called when the highlighter has no more tokens for the current fragment - the scorer returns
+		/// the weighting it has derived for the most recent fragment, typically based on the tokens
+		/// passed to getTokenScore(). 
+		/// 
+		/// </summary>
+		float GetFragmentScore();
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleFragmenter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/SimpleFragmenter.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleFragmenter.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleFragmenter.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Highlight
+{
+	
+	/// <summary> {@link Fragmenter} implementation which breaks text up into same-size 
+	/// fragments with no concerns over spotting sentence boundaries.
+	/// </summary>
+	/// <author>  mark@searcharea.co.uk
+	/// </author>
+	public class SimpleFragmenter : Fragmenter
+	{
+		private const int DEFAULT_FRAGMENT_SIZE = 100;
+		private int currentNumFrags;
+		private int fragmentSize;
+		
+		
+		public SimpleFragmenter():this(DEFAULT_FRAGMENT_SIZE)
+		{
+		}
+		
+		
+		/// <summary> </summary>
+		/// <param name="fragmentSize">size in bytes of each fragment
+		/// </param>
+		public SimpleFragmenter(int fragmentSize)
+		{
+			this.fragmentSize = fragmentSize;
+		}
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.TextFragmenter#start(java.lang.String)
+		*/
+		public virtual void  Start(System.String originalText)
+		{
+			currentNumFrags = 1;
+		}
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
+		*/
+		public virtual bool IsNewFragment(Token token)
+		{
+			bool isNewFrag = token.EndOffset() >= (fragmentSize * currentNumFrags);
+			if (isNewFrag)
+			{
+				currentNumFrags++;
+			}
+			return isNewFrag;
+		}
+		
+		/// <returns> size in bytes of each fragment
+		/// </returns>
+		public virtual int GetFragmentSize()
+		{
+			return fragmentSize;
+		}
+		
+		/// <param name="size">size in bytes of each fragment
+		/// </param>
+		public virtual void  SetFragmentSize(int size)
+		{
+			fragmentSize = size;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLEncoder.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLEncoder.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLEncoder.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2005 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Highlight
+{
+	/// <summary> Simple {@link Encoder} implementation to escape text for HTML output</summary>
+	/// <author>  Nicko Cadell
+	/// 
+	/// </author>
+	public class SimpleHTMLEncoder : Encoder
+	{
+		public SimpleHTMLEncoder()
+		{
+		}
+		
+		public virtual System.String EncodeText(System.String originalText)
+		{
+			return HtmlEncode(originalText);
+		}
+		
+		/// <summary> Encode string into HTML</summary>
+		public static System.String HtmlEncode(System.String plainText)
+		{
+			if (plainText == null || plainText.Length == 0)
+			{
+				return "";
+			}
+			
+			System.Text.StringBuilder result = new System.Text.StringBuilder(plainText.Length);
+			
+			for (int index = 0; index < plainText.Length; index++)
+			{
+				char ch = plainText[index];
+				
+				switch (ch)
+				{
+					
+					case '"': 
+						result.Append("&quot;");
+						break;
+					
+					
+					case '&': 
+						result.Append("&amp;");
+						break;
+					
+					
+					case '<': 
+						result.Append("&lt;");
+						break;
+					
+					
+					case '>': 
+						result.Append("&gt;");
+						break;
+					
+					
+					default: 
+						if (ch < 128)
+						{
+							result.Append(ch);
+						}
+						else
+						{
+							result.Append("&#").Append((int) ch).Append(";");
+						}
+						break;
+					
+				}
+			}
+			
+			return result.ToString();
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLFormatter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLFormatter.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLFormatter.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SimpleHTMLFormatter.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Highlight
+{
+	/// <summary> Simple {@link Formatter} implementation to highlight terms with a pre and post tag</summary>
+	/// <author>  MAHarwood
+	/// 
+	/// </author>
+	public class SimpleHTMLFormatter : Formatter
+	{
+		internal System.String preTag;
+		internal System.String postTag;
+		
+		
+		public SimpleHTMLFormatter(System.String preTag, System.String postTag)
+		{
+			this.preTag = preTag;
+			this.postTag = postTag;
+		}
+		
+		/// <summary> Default constructor uses HTML: &lt;B&gt; tags to markup terms
+		/// 
+		/// 
+		/// </summary>
+		public SimpleHTMLFormatter()
+		{
+			this.preTag = "<B>";
+			this.postTag = "</B>";
+		}
+		
+		/* (non-Javadoc)
+		* @see Lucene.Net.Highlight.Formatter#highlightTerm(java.lang.String, Lucene.Net.Highlight.TokenGroup)
+		*/
+		public virtual System.String HighlightTerm(System.String originalText, TokenGroup tokenGroup)
+		{
+			System.Text.StringBuilder returnBuffer;
+			if (tokenGroup.GetTotalScore() > 0)
+			{
+				returnBuffer = new System.Text.StringBuilder();
+				returnBuffer.Append(preTag);
+				returnBuffer.Append(originalText);
+				returnBuffer.Append(postTag);
+				return returnBuffer.ToString();
+			}
+			return originalText;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SpanGradientFormatter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/SpanGradientFormatter.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SpanGradientFormatter.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/SpanGradientFormatter.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Highlight
+{
+	/// <summary> Formats text with different color intensity depending on the score of the
+	/// term using the span tag.  GradientFormatter uses a bgcolor argument to the font tag which
+	/// doesn't work in Mozilla, thus this class.
+	/// 
+	/// </summary>
+	/// <seealso cref="GradientFormatter">
+	/// </seealso>
+	/// <author>  David Spencer dave@searchmorph.com
+	/// </author>
+	
+	public class SpanGradientFormatter:GradientFormatter
+	{
+		public SpanGradientFormatter(float maxScore, System.String minForegroundColor, System.String maxForegroundColor, System.String minBackgroundColor, System.String maxBackgroundColor):base(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor)
+		{
+		}
+		
+		
+		
+		public override System.String HighlightTerm(System.String originalText, TokenGroup tokenGroup)
+		{
+			if (tokenGroup.GetTotalScore() == 0)
+				return originalText;
+			float score = tokenGroup.GetTotalScore();
+			if (score == 0)
+			{
+				return originalText;
+			}
+			
+			// try to size sb correctly
+			System.Text.StringBuilder sb = new System.Text.StringBuilder(originalText.Length + EXTRA);
+			
+			sb.Append("<span style=\"");
+			if (highlightForeground)
+			{
+				sb.Append("color: ");
+				sb.Append(GetForegroundColorString(score));
+				sb.Append("; ");
+			}
+			if (highlightBackground)
+			{
+				sb.Append("background: ");
+				sb.Append(GetBackgroundColorString(score));
+				sb.Append("; ");
+			}
+			sb.Append("\">");
+			sb.Append(originalText);
+			sb.Append("</span>");
+			return sb.ToString();
+		}
+		
+		// guess how much extra text we'll add to the text we're highlighting to try to avoid a  StringBuffer resize
+		private const System.String TEMPLATE = "<span style=\"background: #EEEEEE; color: #000000;\">...</span>";
+		private static readonly int EXTRA = TEMPLATE.Length;
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TextFragment.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/TextFragment.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TextFragment.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TextFragment.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Highlight
+{
+	/// <summary> Low-level class used to record information about a section of a document 
+	/// with a score.
+	/// </summary>
+	/// <author>  MAHarwood
+	/// 
+	/// 
+	/// </author>
+	public class TextFragment
+	{
+		internal System.Text.StringBuilder markedUpText;
+		internal int fragNum;
+		internal int textStartPos;
+		internal int textEndPos;
+		internal float score;
+		
+		public TextFragment(System.Text.StringBuilder markedUpText, int textStartPos, int fragNum)
+		{
+			this.markedUpText = markedUpText;
+			this.textStartPos = textStartPos;
+			this.fragNum = fragNum;
+		}
+		internal virtual void  SetScore(float score)
+		{
+			this.score = score;
+		}
+		public virtual float GetScore()
+		{
+			return score;
+		}
+		/// <param name="frag2">Fragment to be merged into this one
+		/// </param>
+		public virtual void  Merge(TextFragment frag2)
+		{
+			textEndPos = frag2.textEndPos;
+			score = System.Math.Max(score, frag2.score);
+		}
+		/// <param name="fragment">
+		/// </param>
+		/// <returns> true if this fragment follows the one passed
+		/// </returns>
+		public virtual bool Follows(TextFragment fragment)
+		{
+			return textStartPos == fragment.textEndPos;
+		}
+		
+		/// <returns> the fragment sequence number
+		/// </returns>
+		public virtual int GetFragNum()
+		{
+			return fragNum;
+		}
+		
+		/* Returns the marked-up text for this text fragment 
+		*/
+		public override System.String ToString()
+		{
+			return markedUpText.ToString(textStartPos, textEndPos);
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TokenGroup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/TokenGroup.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TokenGroup.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TokenGroup.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Highlight
+{
+	
+	/// <summary> One, or several overlapping tokens, along with the score(s) and the
+	/// scope of the original text
+	/// </summary>
+	/// <author>  MAHarwood
+	/// </author>
+	public class TokenGroup
+	{
+		
+		private const int MAX_NUM_TOKENS_PER_GROUP = 50;
+		internal Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
+		internal float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
+		internal int numTokens = 0;
+		internal int startOffset = 0;
+		internal int endOffset = 0;
+		
+		
+		internal virtual void  AddToken(Token token, float score)
+		{
+			if (numTokens < MAX_NUM_TOKENS_PER_GROUP)
+			{
+				if (numTokens == 0)
+				{
+					startOffset = token.StartOffset();
+					endOffset = token.EndOffset();
+				}
+				else
+				{
+					startOffset = Math.Min(startOffset, token.StartOffset());
+					endOffset = Math.Max(endOffset, token.EndOffset());
+				}
+				tokens[numTokens] = token;
+				scores[numTokens] = score;
+				numTokens++;
+			}
+		}
+		
+		internal virtual bool IsDistinct(Token token)
+		{
+			return token.StartOffset() >= endOffset;
+		}
+		
+		
+		internal virtual void  Clear()
+		{
+			numTokens = 0;
+		}
+		
+		/// <summary> </summary>
+		/// <param name="index">a value between 0 and numTokens -1
+		/// </param>
+		/// <returns> the "n"th token
+		/// </returns>
+		public virtual Token GetToken(int index)
+		{
+			return tokens[index];
+		}
+		
+		/// <summary> </summary>
+		/// <param name="index">a value between 0 and numTokens -1
+		/// </param>
+		/// <returns> the "n"th score
+		/// </returns>
+		public virtual float GetScore(int index)
+		{
+			return scores[index];
+		}
+		
+		/// <returns> the end position in the original text
+		/// </returns>
+		public virtual int GetEndOffset()
+		{
+			return endOffset;
+		}
+		
+		/// <returns> the number of tokens in this group
+		/// </returns>
+		public virtual int GetNumTokens()
+		{
+			return numTokens;
+		}
+		
+		/// <returns> the start position in the original text
+		/// </returns>
+		public virtual int GetStartOffset()
+		{
+			return startOffset;
+		}
+		
+		/// <returns> all tokens' scores summed up
+		/// </returns>
+		public virtual float GetTotalScore()
+		{
+			float total = 0;
+			for (int i = 0; i < numTokens; i++)
+			{
+				total += scores[i];
+			}
+			return total;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TokenSources.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/TokenSources.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TokenSources.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/TokenSources.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+* Created on 28-Oct-2004
+*/
+using System;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Token = Lucene.Net.Analysis.Token;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using Document = Lucene.Net.Documents.Document;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using TermFreqVector = Lucene.Net.Index.TermFreqVector;
+using TermPositionVector = Lucene.Net.Index.TermPositionVector;
+using TermVectorOffsetInfo = Lucene.Net.Index.TermVectorOffsetInfo;
+
+namespace Lucene.Net.Highlight
+{
+	
+	/// <summary> Hides implementation issues associated with obtaining a TokenStream for use with
+	/// the higlighter - can obtain from TermFreqVectors with offsets and (optionally) positions or
+	/// from Analyzer class reparsing the stored content. 
+	/// </summary>
+	/// <author>  maharwood
+	/// </author>
+	public class TokenSources
+	{
+		public class StoredTokenStream : TokenStream
+		{
+			internal Token[] tokens;
+			internal int currentToken = 0;
+			internal StoredTokenStream(Token[] tokens)
+			{
+				this.tokens = tokens;
+			}
+			public override Token Next()
+			{
+				if (currentToken >= tokens.Length)
+				{
+					return null;
+				}
+				return tokens[currentToken++];
+			}
+		}
+		private class AnonymousClassComparator : System.Collections.IComparer
+		{
+			public virtual int Compare(System.Object o1, System.Object o2)
+			{
+				Token t1 = (Token) o1;
+				Token t2 = (Token) o2;
+				if (t1.StartOffset() > t2.StartOffset())
+					return 1;
+				if (t1.StartOffset() < t2.StartOffset())
+					return - 1;
+				return 0;
+			}
+		}
+		/// <summary> A convenience method that tries a number of approaches to getting a token stream.
+		/// The cost of finding there are no termVectors in the index is minimal (1000 invocations still 
+		/// registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable
+		/// </summary>
+		/// <param name="">reader
+		/// </param>
+		/// <param name="">docId
+		/// </param>
+		/// <param name="">field
+		/// </param>
+		/// <param name="">analyzer
+		/// </param>
+		/// <returns> null if field not stored correctly 
+		/// </returns>
+		/// <throws>  IOException </throws>
+		public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
+		{
+			TokenStream ts = null;
+			
+			TermFreqVector tfv = (TermFreqVector) reader.GetTermFreqVector(docId, field);
+			if (tfv != null)
+			{
+				if (tfv is TermPositionVector)
+				{
+					ts = GetTokenStream((TermPositionVector) tfv);
+				}
+			}
+			//No token info stored so fall back to analyzing raw content
+			if (ts == null)
+			{
+				ts = GetTokenStream(reader, docId, field, analyzer);
+			}
+			return ts;
+		}
+		
+		
+		public static TokenStream GetTokenStream(TermPositionVector tpv)
+		{
+			//assumes the worst and makes no assumptions about token position sequences.
+			return GetTokenStream(tpv, false);
+		}
+		/// <summary> Low level api.
+		/// Returns a token stream or null if no offset info available in index.
+		/// This can be used to feed the highlighter with a pre-parsed token stream 
+		/// 
+		/// In my tests the speeds to recreate 1000 token streams using this method are:
+		/// - with TermVector offset only data stored - 420  milliseconds 
+		/// - with TermVector offset AND position data stored - 271 milliseconds
+		/// (nb timings for TermVector with position data are based on a tokenizer with contiguous
+		/// positions - no overlaps or gaps)
+		/// The cost of not using TermPositionVector to store
+		/// pre-parsed content and using an analyzer to re-parse the original content: 
+		/// - reanalyzing the original content - 980 milliseconds
+		/// 
+		/// The re-analyze timings will typically vary depending on -
+		/// 1) The complexity of the analyzer code (timings above were using a 
+		/// stemmer/lowercaser/stopword combo)
+		/// 2) The  number of other fields (Lucene reads ALL fields off the disk 
+		/// when accessing just one document field - can cost dear!)
+		/// 3) Use of compression on field storage - could be faster cos of compression (less disk IO)
+		/// or slower (more CPU burn) depending on the content.
+		/// 
+		/// </summary>
+		/// <param name="">tpv
+		/// </param>
+		/// <param name="tokenPositionsGuaranteedContiguous">true if the token position numbers have no overlaps or gaps. If looking
+		/// to eek out the last drops of performance, set to true. If in doubt, set to false.
+		/// </param>
+		public static TokenStream GetTokenStream(TermPositionVector tpv, bool tokenPositionsGuaranteedContiguous)
+		{
+			//an object used to iterate across an array of tokens
+			//code to reconstruct the original sequence of Tokens
+			System.String[] terms = tpv.GetTerms();
+			int[] freq = tpv.GetTermFrequencies();
+			int totalTokens = 0;
+			for (int t = 0; t < freq.Length; t++)
+			{
+				totalTokens += freq[t];
+			}
+			Token[] tokensInOriginalOrder = new Token[totalTokens];
+			System.Collections.ArrayList unsortedTokens = null;
+			for (int t = 0; t < freq.Length; t++)
+			{
+				TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t);
+				if (offsets == null)
+				{
+					return null;
+				}
+				
+				int[] pos = null;
+				if (tokenPositionsGuaranteedContiguous)
+				{
+					//try get the token position info to speed up assembly of tokens into sorted sequence
+					pos = tpv.GetTermPositions(t);
+				}
+				if (pos == null)
+				{
+					//tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
+					if (unsortedTokens == null)
+					{
+						unsortedTokens = new System.Collections.ArrayList();
+					}
+					for (int tp = 0; tp < offsets.Length; tp++)
+					{
+						unsortedTokens.Add(new Token(terms[t], offsets[tp].GetStartOffset(), offsets[tp].GetEndOffset()));
+					}
+				}
+				else
+				{
+					//We have positions stored and a guarantee that the token position information is contiguous
+					
+					// This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
+					// creates jumps in position numbers - this code would fail under those circumstances
+					
+					//tokens stored with positions - can use this to index straight into sorted array
+					for (int tp = 0; tp < pos.Length; tp++)
+					{
+						tokensInOriginalOrder[pos[tp]] = new Token(terms[t], offsets[tp].GetStartOffset(), offsets[tp].GetEndOffset());
+					}
+				}
+			}
+			//If the field has been stored without position data we must perform a sort        
+			if (unsortedTokens != null)
+			{
+				tokensInOriginalOrder = (Token[]) unsortedTokens.ToArray(typeof(Token));
+				Array.Sort(tokensInOriginalOrder, new AnonymousClassComparator());
+			}
+			return new StoredTokenStream(tokensInOriginalOrder);
+		}
+		
+		public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field)
+		{
+			TermFreqVector tfv = (TermFreqVector) reader.GetTermFreqVector(docId, field);
+			if (tfv == null)
+			{
+				throw new System.ArgumentException(field + " in doc #" + docId + "does not have any term position data stored");
+			}
+			if (tfv is TermPositionVector)
+			{
+				TermPositionVector tpv = (TermPositionVector) reader.GetTermFreqVector(docId, field);
+				return GetTokenStream(tpv);
+			}
+			throw new System.ArgumentException(field + " in doc #" + docId + "does not have any term position data stored");
+		}
+		
+		//convenience method
+		public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
+		{
+			Document doc = reader.Document(docId);
+			System.String contents = doc.Get(field);
+			if (contents == null)
+			{
+				throw new System.ArgumentException("Field " + field + " in document #" + docId + " is not stored and cannot be analyzed");
+			}
+			return analyzer.TokenStream(field, new System.IO.StringReader(contents));
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/WeightedTerm.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Highlighter.Net/WeightedTerm.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/WeightedTerm.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Highlighter.Net/WeightedTerm.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Highlight
+{
+	/// <summary>Lightweight class to hold term and a weight value used for scoring this term </summary>
+	/// <author>  Mark Harwood
+	/// </author>
+	public class WeightedTerm
+	{
+		internal float weight; // multiplier
+		internal System.String term; //stemmed form
+		public WeightedTerm(float weight, System.String term)
+		{
+			this.weight = weight;
+			this.term = term;
+		}
+		
+		
+		/// <returns> the term value (stemmed)
+		/// </returns>
+		public virtual System.String GetTerm()
+		{
+			return term;
+		}
+		
+		/// <returns> the weight associated with this term
+		/// </returns>
+		public virtual float GetWeight()
+		{
+			return weight;
+		}
+		
+		/// <param name="term">the term value (stemmed)
+		/// </param>
+		public virtual void  SetTerm(System.String term)
+		{
+			this.term = term;
+		}
+		
+		/// <param name="weight">the weight associated with this term
+		/// </param>
+		public virtual void  SetWeight(float weight)
+		{
+			this.weight = weight;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/LICENSE.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/LICENSE.txt?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/LICENSE.txt (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/LICENSE.txt Wed Dec 27 07:05:35 2006
@@ -0,0 +1,15 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Test/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Test/AssemblyInfo.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Test/AssemblyInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Test/AssemblyInfo.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,58 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+
+//
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+//
+[assembly: AssemblyTitle("")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]		
+
+//
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Revision and Build Numbers 
+// by using the '*' as shown below:
+
+[assembly: AssemblyVersion("2.0.0.000")]
+
+//
+// In order to sign your assembly you must specify a key to use. Refer to the 
+// Microsoft .NET Framework documentation for more information on assembly signing.
+//
+// Use the attributes below to control which key is used for signing. 
+//
+// Notes: 
+//   (*) If no key is specified, the assembly is not signed.
+//   (*) KeyName refers to a key that has been installed in the Crypto Service
+//       Provider (CSP) on your machine. KeyFile refers to a file which contains
+//       a key.
+//   (*) If the KeyFile and the KeyName values are both specified, the 
+//       following processing occurs:
+//       (1) If the KeyName can be found in the CSP, that key is used.
+//       (2) If the KeyName does not exist and the KeyFile does exist, the key 
+//           in the KeyFile is installed into the CSP and used.
+//   (*) In order to create a KeyFile, you can use the sn.exe (Strong Name) utility.
+//       When specifying the KeyFile, the location of the KeyFile should be
+//       relative to the project output directory which is
+//       %Project Directory%\obj\<configuration>. For example, if your KeyFile is
+//       located in the project directory, you would specify the AssemblyKeyFile 
+//       attribute as [assembly: AssemblyKeyFile("..\\..\\mykey.snk")]
+//   (*) Delay Signing is an advanced option - see the Microsoft .NET Framework
+//       documentation for more information on this.
+//
+[assembly: AssemblyDelaySign(false)]
+[assembly: AssemblyKeyFile("")]
+[assembly: AssemblyKeyName("")]

Added: incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Test/HighlighterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Highlighter.Net/Test/HighlighterTest.cs?view=auto&rev=490508
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Test/HighlighterTest.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Highlighter.Net/Test/HighlighterTest.cs Wed Dec 27 07:05:35 2006
@@ -0,0 +1,951 @@
+/*
+ * Copyright 2002-2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using NUnit.Framework;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using LowerCaseTokenizer = Lucene.Net.Analysis.LowerCaseTokenizer;
+using Token = Lucene.Net.Analysis.Token;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
+using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using IndexReader = Lucene.Net.Index.IndexReader;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using Term = Lucene.Net.Index.Term;
+using ParseException = Lucene.Net.QueryParsers.ParseException;
+using QueryParser = Lucene.Net.QueryParsers.QueryParser;
+using FilteredQuery = Lucene.Net.Search.FilteredQuery;
+using Hits = Lucene.Net.Search.Hits;
+using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+using MultiSearcher = Lucene.Net.Search.MultiSearcher;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using Query = Lucene.Net.Search.Query;
+using RangeFilter = Lucene.Net.Search.RangeFilter;
+using Searcher = Lucene.Net.Search.Searcher;
+using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
+using RAMDirectory = Lucene.Net.Store.RAMDirectory;
+using Formatter = Lucene.Net.Highlight.Formatter;
+using Highlighter = Lucene.Net.Highlight.Highlighter;
+using NullFragmenter = Lucene.Net.Highlight.NullFragmenter;
+using QueryScorer = Lucene.Net.Highlight.QueryScorer;
+using Scorer = Lucene.Net.Highlight.Scorer;
+using SimpleFragmenter = Lucene.Net.Highlight.SimpleFragmenter;
+using SimpleHTMLEncoder = Lucene.Net.Highlight.SimpleHTMLEncoder;
+using SimpleHTMLFormatter = Lucene.Net.Highlight.SimpleHTMLFormatter;
+using TextFragment = Lucene.Net.Highlight.TextFragment;
+using TokenGroup = Lucene.Net.Highlight.TokenGroup;
+using WeightedTerm = Lucene.Net.Highlight.WeightedTerm;
+
+namespace Lucene.Net.Search.Highlight
+{
+	
+	/// <summary> JUnit Test for Highlighter class.</summary>
+	/// <author>  mark@searcharea.co.uk
+	/// </author>
+	[TestFixture]
+    public class HighlighterTest : Formatter
+	{
+        /*  // {{Aroush-2.0}} Fix-me
+		private class AnonymousClassScorer : Scorer
+		{
+			public AnonymousClassScorer(HighlighterTest enclosingInstance)
+			{
+				InitBlock(enclosingInstance);
+			}
+			private void  InitBlock(HighlighterTest enclosingInstance)
+			{
+				this.enclosingInstance = enclosingInstance;
+			}
+			private HighlighterTest enclosingInstance;
+			public HighlighterTest Enclosing_Instance
+			{
+				get
+				{
+					return enclosingInstance;
+				}
+				
+			}
+			public virtual void  StartFragment(TextFragment newFragment)
+			{
+			}
+			public virtual float GetTokenScore(Token token)
+			{
+				return 0;
+			}
+			public virtual float GetFragmentScore()
+			{
+				return 1;
+			}
+
+            public override bool SkipTo(int target)
+            {
+                return false;
+            }
+            public override Explanation Explain(int doc)
+            {
+                return null;
+            }
+            public override float Score()
+            {
+                return 0;
+            }
+            public override bool Next()
+            {
+                return false;
+            }
+            public override int Doc()
+            {
+                return -1;
+            }
+		}
+        */
+		private IndexReader reader;
+		private const System.String FIELD_NAME = "contents";
+		private Query query;
+		internal RAMDirectory ramDir;
+		public Searcher searcher = null;
+		public Hits hits = null;
+		internal int numHighlights = 0;
+		internal Analyzer analyzer = new StandardAnalyzer();
+		
+		internal System.String[] texts = new System.String[]{"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", "JFK has been shot", "John Kennedy has been shot", "This text has a typo in referring to Keneddy"};
+		
+		/// <summary> Constructor for HighlightExtractorTest.</summary>
+		/// <param name="">arg0
+		/// </param>
+		//public HighlighterTest(System.String arg0)
+		//{
+		//}
+		
+		[Test]
+        public virtual void  TestSimpleHighlighter()
+		{
+			DoSearching("Kennedy");
+			Highlighter highlighter = new Highlighter(new QueryScorer(query));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(40));
+			int maxNumFragmentsRequired = 2;
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				
+				System.String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+				System.Console.Out.WriteLine("\t" + result);
+			}
+			//Not sure we can assert anything here - just running to check we dont throw any exceptions
+		}
+		
+		
+		[Test]
+		public virtual void  TestGetBestFragmentsSimpleQuery()
+		{
+			DoSearching("Kennedy");
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+
+        [Test]
+        public virtual void  TestGetFuzzyFragments()
+		{
+			DoSearching("Kinnedy~");
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+		[Test]
+        public virtual void  TestGetWildCardFragments()
+		{
+			DoSearching("K?nnedy");
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+        public virtual void  TestGetMidWildCardFragments()
+		{
+			DoSearching("K*dy");
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+        public virtual void  TestGetRangeFragments()
+		{
+			DoSearching(FIELD_NAME + ":[kannedy TO kznnedy]"); //bug?needs lower case
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+		[Test]
+        public virtual void  TestGetBestFragmentsPhrase()
+		{
+			DoSearching("\"John Kennedy\"");
+			DoStandardHighlights();
+			//Currently highlights "John" and "Kennedy" separately
+			Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+        public virtual void  TestGetBestFragmentsSpan()
+		{
+			SpanQuery[] clauses = new SpanQuery[]{new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy"))};
+			
+			SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
+			DoSearching(snq);
+			DoStandardHighlights();
+			//Currently highlights "John" and "Kennedy" separately
+			Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+        public virtual void  TestGetBestFragmentsFilteredQuery()
+		{
+			RangeFilter rf = new RangeFilter("contents", "john", "john", true, true);
+			SpanQuery[] clauses = new SpanQuery[]{new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy"))};
+			SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
+			FilteredQuery fq = new FilteredQuery(snq, rf);
+			
+			DoSearching(fq);
+			DoStandardHighlights();
+			//Currently highlights "John" and "Kennedy" separately
+			Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+        public virtual void  TestGetBestFragmentsFilteredPhraseQuery()
+		{
+			RangeFilter rf = new RangeFilter("contents", "john", "john", true, true);
+			PhraseQuery pq = new PhraseQuery();
+			pq.Add(new Term("contents", "john"));
+			pq.Add(new Term("contents", "kennedy"));
+			FilteredQuery fq = new FilteredQuery(pq, rf);
+			
+			DoSearching(fq);
+			DoStandardHighlights();
+			//Currently highlights "John" and "Kennedy" separately
+			Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+		[Test]
+        public virtual void  TestGetBestFragmentsMultiTerm()
+		{
+			DoSearching("John Kenn*");
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+        public virtual void  TestGetBestFragmentsWithOr()
+		{
+			DoSearching("JFK OR Kennedy");
+			DoStandardHighlights();
+			Assert.IsTrue(numHighlights == 5, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+		[Test]
+        public virtual void  TestGetBestSingleFragment()
+		{
+			DoSearching("Kennedy");
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(40));
+			
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				System.String result = highlighter.GetBestFragment(tokenStream, text);
+				System.Console.Out.WriteLine("\t" + result);
+			}
+			Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");
+			
+			numHighlights = 0;
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				highlighter.GetBestFragment(analyzer, FIELD_NAME, text);
+			}
+			Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");
+			
+			numHighlights = 0;
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				highlighter.GetBestFragments(analyzer, FIELD_NAME, text, 10);
+			}
+			Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+		[Test]
+        public virtual void  TestGetBestSingleFragmentWithWeights()
+		{
+			WeightedTerm[] wTerms = new WeightedTerm[2];
+			wTerms[0] = new WeightedTerm(10f, "hello");
+			wTerms[1] = new WeightedTerm(1f, "kennedy");
+			Highlighter highlighter = new Highlighter(new QueryScorer(wTerms));
+			TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0]));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(2));
+			
+			System.String result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim();
+			Assert.IsTrue("<B>Hello</B>".Equals(result), "Failed to find best section using weighted terms. Found: [" + result + "]");
+			
+			//readjust weights
+			wTerms[1].SetWeight(50f);
+			tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0]));
+			highlighter = new Highlighter(new QueryScorer(wTerms));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(2));
+			
+			result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim();
+			Assert.IsTrue("<B>kennedy</B>".Equals(result), "Failed to find best section using weighted terms. Found: " + result);
+		}
+		
+		
+		// tests a "complex" analyzer that produces multiple 
+		// overlapping tokens 
+		[Test]
+        public virtual void  TestOverlapAnalyzer()
+		{
+			//UPGRADE_TODO: Class 'java.util.HashMap' was converted to 'System.Collections.Hashtable' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javautilHashMap_3"'
+			System.Collections.Hashtable synonyms = new System.Collections.Hashtable();
+			synonyms["football"] = "soccer,footie";
+			Analyzer analyzer = new SynonymAnalyzer(synonyms);
+			System.String srchkey = "football";
+			
+			System.String s = "football-soccer in the euro 2004 footie competition";
+			QueryParser parser = new QueryParser("bookid", analyzer);
+			Query query = parser.Parse(srchkey);
+			
+			Highlighter highlighter = new Highlighter(new QueryScorer(query));
+			TokenStream tokenStream = analyzer.TokenStream(null, new System.IO.StringReader(s));
+			// Get 3 best fragments and seperate with a "..."
+			System.String result = highlighter.GetBestFragments(tokenStream, s, 3, "...");
+			System.String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
+			Assert.IsTrue(expectedResult.Equals(result), "overlapping analyzer should handle highlights OK");
+		}
+		
+		[Test]
+		public virtual void  TestGetSimpleHighlight()
+		{
+			DoSearching("Kennedy");
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				
+				System.String result = highlighter.GetBestFragment(tokenStream, text);
+				System.Console.Out.WriteLine("\t" + result);
+			}
+			Assert.IsTrue(numHighlights == 4, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+		[Test]
+		public virtual void  TestGetTextFragments()
+		{
+			DoSearching("Kennedy");
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(20));
+			
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				
+				System.String[] stringResults = highlighter.GetBestFragments(tokenStream, text, 10);
+				
+				tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				TextFragment[] fragmentResults = highlighter.GetBestTextFragments(tokenStream, text, true, 10);
+				
+				Assert.IsTrue(fragmentResults.Length == stringResults.Length, "Failed to find correct number of text Fragments: " + fragmentResults.Length + " vs " + stringResults.Length);
+				for (int j = 0; j < stringResults.Length; j++)
+				{
+					//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javaioPrintStreamprintln_javalangObject_3"'
+					System.Console.Out.WriteLine(fragmentResults[j]);
+					Assert.IsTrue(fragmentResults[j].ToString().Equals(stringResults[j]), "Failed to find same text Fragments: " + fragmentResults[j] + " found");
+				}
+			}
+		}
+		
+        [Test]
+		public virtual void  TestMaxSizeHighlight()
+		{
+			DoSearching("meat");
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			highlighter.SetMaxDocBytesToAnalyze(30);
+			TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(texts[0]));
+			highlighter.GetBestFragment(tokenStream, texts[0]);
+			Assert.IsTrue(numHighlights == 0, "Setting MaxDocBytesToAnalyze should have prevented " + "us from finding matches for this record: " + numHighlights + " found");
+		}
+		
+		
+		[Test]
+		public virtual void  TestUnRewrittenQuery()
+		{
+			//test to show how rewritten query can still be used
+			searcher = new IndexSearcher(ramDir);
+			Analyzer analyzer = new StandardAnalyzer();
+			
+			QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
+			Query query = parser.Parse("JF? or Kenned*");
+			System.Console.Out.WriteLine("Searching with primitive query");
+			//forget to set this and...
+			//query=query.rewrite(reader);
+			Hits hits = searcher.Search(query);
+			
+			//create an instance of the highlighter with the tags used to surround highlighted text
+			//		QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer());
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			
+			highlighter.SetTextFragmenter(new SimpleFragmenter(40));
+			
+			int maxNumFragmentsRequired = 3;
+			
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				
+				System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+				System.Console.Out.WriteLine(highlightedText);
+			}
+			//We expect to have zero highlights if the query is multi-terms and is not rewritten!
+			Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+		public virtual void  TestNoFragments()
+		{
+			DoSearching("AnInvalidQueryWhichShouldYieldNoResults");
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			
+			for (int i = 0; i < texts.Length; i++)
+			{
+				System.String text = texts[i];
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				
+				System.String result = highlighter.GetBestFragment(tokenStream, text);
+				Assert.IsNull(result, "The highlight result should be null for text with no query terms");
+			}
+		}
+		
+		/// <summary> Demonstrates creation of an XHTML compliant doc using new encoding facilities.</summary>
+		/// <throws>  Exception </throws>
+		[Test]
+		public virtual void  TestEncoding()
+		{
+            /*  // {{Aroush-2.0}} Fix-me
+			System.String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article";
+			//run the highlighter on the raw content (scorer does not score any tokens for 
+			// highlighting but scores a single fragment for selection
+			Highlighter highlighter = new Highlighter(this, new SimpleHTMLEncoder(), new AnonymousClassScorer(this));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(2000));
+			TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(rawDocContent));
+			
+			System.String encodedSnippet = highlighter.GetBestFragments(tokenStream, rawDocContent, 1, "");
+			//An ugly bit of XML creation:
+			System.String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE html\n" + "PUBLIC \"//W3C//DTD XHTML 1.0 Transitional//EN\"\n" + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n" + "<head>\n" + "<title>My Test HTML Document</title>\n" + "</head>\n" + "<body>\n" + "<h2>" + encodedSnippet + "</h2>\n" + "</body>\n" + "</html>";
+			//now an ugly built of XML parsing to test the snippet is encoded OK 
+			//UPGRADE_ISSUE: Class 'javax.xml.parsers.DocumentBuilderFactory' was not converted. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1000_javaxxmlparsersDocumentBuilderFactory_3"'
+			//UPGRADE_ISSUE: Method 'javax.xml.parsers.DocumentBuilderFactory.newInstance' was not converted. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1000_javaxxmlparsersDocumentBuilderFactory_3"'
+			DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+			//UPGRADE_TODO: Class 'javax.xml.parsers.DocumentBuilder' was converted to 'System.Xml.XmlDocument' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javaxxmlparsersDocumentBuilder_3"'
+			System.Xml.XmlDocument db = new System.Xml.XmlDocument();
+			System.Xml.XmlDocument tempDocument;
+			//UPGRADE_TODO: Method 'javax.xml.parsers.DocumentBuilder.parse' was converted to 'System.Xml.XmlDocument.Load' which has a different behavior. 'ms-help://MS.VSCC.2003/commoner/redir/redirect.htm?keyword="jlca1073_javaxxmlparsersDocumentBuilderparse_javaioInputStream_3"'
+			tempDocument = (System.Xml.XmlDocument) db.Clone();
+			tempDocument.Load(new System.IO.MemoryStream(SupportClass.ToByteArray(xhtml)));
+			System.Xml.XmlDocument doc = tempDocument;
+			System.Xml.XmlElement root = (System.Xml.XmlElement) doc.DocumentElement;
+			System.Xml.XmlNodeList nodes = root.GetElementsByTagName("body");
+			System.Xml.XmlElement body = (System.Xml.XmlElement) nodes.Item(0);
+			nodes = body.GetElementsByTagName("h2");
+			System.Xml.XmlElement h2 = (System.Xml.XmlElement) nodes.Item(0);
+			System.String decodedSnippet = h2.FirstChild.Value;
+			Assert.AreEqual(rawDocContent, decodedSnippet, "XHTML Encoding should have worked:");
+            */
+		}
+		
+        [Test]
+		public virtual void  TestMultiSearcher()
+		{
+			//setup index 1
+			RAMDirectory ramDir1 = new RAMDirectory();
+			IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true);
+			Document d = new Document();
+			Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED);
+			d.Add(f);
+			writer1.AddDocument(d);
+			writer1.Optimize();
+			writer1.Close();
+			IndexReader reader1 = IndexReader.Open(ramDir1);
+			
+			//setup index 2
+			RAMDirectory ramDir2 = new RAMDirectory();
+			IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true);
+			d = new Document();
+			f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED);
+			d.Add(f);
+			writer2.AddDocument(d);
+			writer2.Optimize();
+			writer2.Close();
+			IndexReader reader2 = IndexReader.Open(ramDir2);
+			
+			
+			
+			IndexSearcher[] searchers = new IndexSearcher[2];
+			searchers[0] = new IndexSearcher(ramDir1);
+			searchers[1] = new IndexSearcher(ramDir2);
+			MultiSearcher multiSearcher = new MultiSearcher(searchers);
+			QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+			query = parser.Parse("multi*");
+			System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
+			//at this point the multisearcher calls combine(query[])
+			hits = multiSearcher.Search(query);
+			
+			//query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer());
+			Query[] expandedQueries = new Query[2];
+			expandedQueries[0] = query.Rewrite(reader1);
+			expandedQueries[1] = query.Rewrite(reader2);
+			query = query.Combine(expandedQueries);
+			
+			
+			//create an instance of the highlighter with the tags used to surround highlighted text
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				System.String highlightedText = highlighter.GetBestFragment(tokenStream, text);
+				System.Console.Out.WriteLine(highlightedText);
+			}
+			Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found");
+		}
+		
+        [Test]
+		public virtual void  TestFieldSpecificHighlighting()
+		{
+			System.String docMainText = "fred is one of the people";
+			QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
+			Query query = parser.Parse("fred category:people");
+			
+			//highlighting respects fieldnames used in query
+			QueryScorer fieldSpecificScorer = new QueryScorer(query, "contents");
+			Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldSpecificScorer);
+			fieldSpecificHighlighter.SetTextFragmenter(new NullFragmenter());
+			System.String result = fieldSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText);
+			Assert.AreEqual(result, "<B>fred</B> is one of the people", "Should match");
+			
+			//highlighting does not respect fieldnames used in query
+			QueryScorer fieldInSpecificScorer = new QueryScorer(query);
+			Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), fieldInSpecificScorer);
+			fieldInSpecificHighlighter.SetTextFragmenter(new NullFragmenter());
+			result = fieldInSpecificHighlighter.GetBestFragment(analyzer, FIELD_NAME, docMainText);
+			Assert.AreEqual(result, "<B>fred</B> is one of the <B>people</B>", "Should match");
+			
+			
+			reader.Close();
+		}
+		
+		
+		
+		/*
+		
+		public void testBigramAnalyzer() throws IOException, ParseException
+		{
+		//test to ensure analyzers with none-consecutive start/end offsets
+		//dont double-highlight text
+		//setup index 1
+		RAMDirectory ramDir = new RAMDirectory();
+		Analyzer bigramAnalyzer=new CJKAnalyzer();
+		IndexWriter writer = new IndexWriter(ramDir,bigramAnalyzer , true);
+		Document d = new Document();
+		Field f = new Field(FIELD_NAME, "java abc def", true, true, true);
+		d.add(f);
+		writer.addDocument(d);
+		writer.close();
+		IndexReader reader = IndexReader.open(ramDir);
+		
+		IndexSearcher searcher=new IndexSearcher(reader);
+		query = QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer);
+		System.out.println("Searching for: " + query.toString(FIELD_NAME));
+		hits = searcher.search(query);
+		
+		Highlighter highlighter =
+		new Highlighter(this,new QueryFragmentScorer(query));
+		
+		for (int i = 0; i < hits.length(); i++)
+		{
+		String text = hits.doc(i).get(FIELD_NAME);
+		TokenStream tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
+		String highlightedText = highlighter.getBestFragment(tokenStream,text);
+		System.out.println(highlightedText);
+		}
+		
+		}*/
+		
+		
+		public virtual System.String HighlightTerm(System.String originalText, TokenGroup group)
+		{
+			if (group.GetTotalScore() <= 0)
+			{
+				return originalText;
+			}
+			numHighlights++; //update stats used in assertions
+			return "<b>" + originalText + "</b>";
+		}
+		
+		public virtual void  DoSearching(System.String queryString)
+		{
+			QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+			query = parser.Parse(queryString);
+			DoSearching(query);
+		}
+		public virtual void  DoSearching(Query unReWrittenQuery)
+		{
+			searcher = new IndexSearcher(ramDir);
+			//for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query!
+			query = unReWrittenQuery.Rewrite(reader);
+			System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME));
+			hits = searcher.Search(query);
+		}
+		
+		internal virtual void  DoStandardHighlights()
+		{
+			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
+			highlighter.SetTextFragmenter(new SimpleFragmenter(20));
+			for (int i = 0; i < hits.Length(); i++)
+			{
+				System.String text = hits.Doc(i).Get(FIELD_NAME);
+				int maxNumFragmentsRequired = 2;
+				System.String fragmentSeparator = "...";
+				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
+				
+				System.String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
+				System.Console.Out.WriteLine("\t" + result);
+			}
+		}
+		
+		/*
+		* @see TestCase#setUp()
+		*/
+		[TestFixtureSetUp]
+        protected virtual void  SetUp()
+		{
+			ramDir = new RAMDirectory();
+			IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(), true);
+			for (int i = 0; i < texts.Length; i++)
+			{
+				AddDoc(writer, texts[i]);
+			}
+			
+			writer.Optimize();
+			writer.Close();
+			reader = IndexReader.Open(ramDir);
+			numHighlights = 0;
+		}
+		
+		private void  AddDoc(IndexWriter writer, System.String text)
+		{
+			Document d = new Document();
+			Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.TOKENIZED);
+			d.Add(f);
+			writer.AddDocument(d);
+		}
+		
+		/*
+		* @see TestCase#tearDown()
+		*/
+        [TestFixtureTearDown]
+		protected virtual void  TearDown()
+		{
+			//base.TearDown();
+		}
+	}
+	
+	
+	//===================================================================
+	//========== BEGIN TEST SUPPORTING CLASSES
+	//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
+	//========== MADE MORE GENERALLY USEFUL.
+	// TODO - make synonyms all interchangeable with each other and produce
+	// a version that does hyponyms - the "is a specialised type of ...."
+	// so that car = audi, bmw and volkswagen but bmw != audi so different
+	// behaviour to synonyms
+	//===================================================================
+	
+	class SynonymAnalyzer : Analyzer
+	{
+		private System.Collections.IDictionary synonyms;
+		
+		public SynonymAnalyzer(System.Collections.IDictionary synonyms)
+		{
+			this.synonyms = synonyms;
+		}
+		
+		/* (non-Javadoc)
+		* @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+		*/
+		public override TokenStream TokenStream(System.String arg0, System.IO.TextReader arg1)
+		{
+			return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
+		}
+	}
+	
+	/// <summary> Expands a token stream with synonyms (TODO - make the synonyms analyzed by choice of analyzer)</summary>
+	/// <author>  MAHarwood
+	/// </author>
+	class SynonymTokenizer : TokenStream
+	{
+		private TokenStream realStream;
+		private Token currentRealToken = null;
+		private System.Collections.IDictionary synonyms;
+		internal Tokenizer st = null;
+		public SynonymTokenizer(TokenStream realStream, System.Collections.IDictionary synonyms)
+		{
+			this.realStream = realStream;
+			this.synonyms = synonyms;
+		}
+		public override Token Next()
+		{
+			if (currentRealToken == null)
+			{
+				Token nextRealToken = realStream.Next();
+				if (nextRealToken == null)
+				{
+					return null;
+				}
+				System.String expansions = (System.String) synonyms[nextRealToken.TermText()];
+				if (expansions == null)
+				{
+					return nextRealToken;
+				}
+				st = new Tokenizer(expansions, ",");
+				if (st.HasMoreTokens())
+				{
+					currentRealToken = nextRealToken;
+				}
+				return currentRealToken;
+			}
+			else
+			{
+				System.String nextExpandedValue = st.NextToken();
+				Token expandedToken = new Token(nextExpandedValue, currentRealToken.StartOffset(), currentRealToken.EndOffset());
+				expandedToken.SetPositionIncrement(0);
+				if (!st.HasMoreTokens())
+				{
+					currentRealToken = null;
+					st = null;
+				}
+				return expandedToken;
+			}
+		}
+	}
+
+    /// <summary>
+    /// The class performs token processing in strings
+    /// </summary>
+    public class Tokenizer : System.Collections.IEnumerator
+    {
+        /// Position over the string
+        private long currentPos = 0;
+
+        /// Include demiliters in the results.
+        private bool includeDelims = false;
+
+        /// Char representation of the String to tokenize.
+        private char[] chars = null;
+			
+        //The tokenizer uses the default delimiter set: the space character, the tab character, the newline character, and the carriage-return character and the form-feed character
+        private string delimiters = " \t\n\r\f";		
+
+        /// <summary>
+        /// Initializes a new class instance with a specified string to process
+        /// </summary>
+        /// <param name="source">String to tokenize</param>
+        public Tokenizer(System.String source)
+        {			
+            this.chars = source.ToCharArray();
+        }
+
+        /// <summary>
+        /// Initializes a new class instance with a specified string to process
+        /// and the specified token delimiters to use
+        /// </summary>
+        /// <param name="source">String to tokenize</param>
+        /// <param name="delimiters">String containing the delimiters</param>
+        public Tokenizer(System.String source, System.String delimiters):this(source)
+        {			
+            this.delimiters = delimiters;
+        }
+
+
+        /// <summary>
+        /// Initializes a new class instance with a specified string to process, the specified token 
+        /// delimiters to use, and whether the delimiters must be included in the results.
+        /// </summary>
+        /// <param name="source">String to tokenize</param>
+        /// <param name="delimiters">String containing the delimiters</param>
+        /// <param name="includeDelims">Determines if delimiters are included in the results.</param>
+        public Tokenizer(System.String source, System.String delimiters, bool includeDelims):this(source,delimiters)
+        {
+            this.includeDelims = includeDelims;
+        }	
+
+
+        /// <summary>
+        /// Returns the next token from the token list
+        /// </summary>
+        /// <returns>The string value of the token</returns>
+        public System.String NextToken()
+        {				
+            return NextToken(this.delimiters);
+        }
+
+        /// <summary>
+        /// Returns the next token from the source string, using the provided
+        /// token delimiters
+        /// </summary>
+        /// <param name="delimiters">String containing the delimiters to use</param>
+        /// <returns>The string value of the token</returns>
+        public System.String NextToken(System.String delimiters)
+        {
+            //According to documentation, the usage of the received delimiters should be temporary (only for this call).
+            //However, it seems it is not true, so the following line is necessary.
+            this.delimiters = delimiters;
+
+            //at the end 
+            if (this.currentPos == this.chars.Length)
+                throw new System.ArgumentOutOfRangeException();
+                //if over a delimiter and delimiters must be returned
+            else if (   (System.Array.IndexOf(delimiters.ToCharArray(),chars[this.currentPos]) != -1)
+                && this.includeDelims )                	
+                return "" + this.chars[this.currentPos++];
+                //need to get the token wo delimiters.
+            else
+                return nextToken(delimiters.ToCharArray());
+        }
+
+        //Returns the nextToken wo delimiters
+        private System.String nextToken(char[] delimiters)
+        {
+            string token="";
+            long pos = this.currentPos;
+
+            //skip possible delimiters
+            while (System.Array.IndexOf(delimiters,this.chars[currentPos]) != -1)
+                //The last one is a delimiter (i.e there is no more tokens)
+                if (++this.currentPos == this.chars.Length)
+                {
+                    this.currentPos = pos;
+                    throw new System.ArgumentOutOfRangeException();
+                }
+			
+            //getting the token
+            while (System.Array.IndexOf(delimiters,this.chars[this.currentPos]) == -1)
+            {
+                token+=this.chars[this.currentPos];
+                //the last one is not a delimiter
+                if (++this.currentPos == this.chars.Length)
+                    break;
+            }
+            return token;
+        }
+
+				
+        /// <summary>
+        /// Determines if there are more tokens to return from the source string
+        /// </summary>
+        /// <returns>True or false, depending if there are more tokens</returns>
+        public bool HasMoreTokens()
+        {
+            //keeping the current pos
+            long pos = this.currentPos;
+			
+            try
+            {
+                this.NextToken();
+            }
+            catch (System.ArgumentOutOfRangeException)
+            {				
+                return false;
+            }
+            finally
+            {
+                this.currentPos = pos;
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// Remaining tokens count
+        /// </summary>
+        public int Count
+        {
+            get
+            {
+                //keeping the current pos
+                long pos = this.currentPos;
+                int i = 0;
+			
+                try
+                {
+                    while (true)
+                    {
+                        this.NextToken();
+                        i++;
+                    }
+                }
+                catch (System.ArgumentOutOfRangeException)
+                {				
+                    this.currentPos = pos;
+                    return i;
+                }
+            }
+        }
+
+        /// <summary>
+        ///  Performs the same action as NextToken.
+        /// </summary>
+        public System.Object Current
+        {
+            get
+            {
+                return (Object) this.NextToken();
+            }		
+        }		
+		
+        /// <summary>
+        //  Performs the same action as HasMoreTokens.
+        /// </summary>
+        /// <returns>True or false, depending if there are more tokens</returns>
+        public bool MoveNext()
+        {
+            return this.HasMoreTokens();
+        }
+		
+        /// <summary>
+        /// Does nothing.
+        /// </summary>
+        public void  Reset()
+        {
+            ;
+        }			
+    }
+}
\ No newline at end of file