You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/23 07:55:22 UTC

[Lucene.Net] svn commit: r1205303 - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries: ./ Similar/

Author: ccurrens
Date: Wed Nov 23 06:55:21 2011
New Revision: 1205303

URL: http://svn.apache.org/viewvc?rev=1205303&view=rev
Log:
ported changes to Contrib.Queries

Added:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FileDiffs.txt
Removed:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Support.cs
Modified:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/BooleanFilter.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Contrib.Queries.csproj
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/DuplicateFilter.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FilterClause.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FuzzyLikeThisQuery.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThis.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThisQuery.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/SimilarityQueries.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/TermsFilter.cs

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/BooleanFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/BooleanFilter.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/BooleanFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/BooleanFilter.cs Wed Nov 23 06:55:21 2011
@@ -21,6 +21,7 @@ using System.Linq;
 using System.Text;
 
 using Lucene.Net.Index;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 
 namespace Lucene.Net.Search
@@ -51,7 +52,7 @@ namespace Lucene.Net.Search
         /// <returns></returns>
         private DocIdSetIterator GetDISI(List<Filter> filters, int index, IndexReader reader)
         {
-            return ((Filter)filters[index]).GetDocIdSet(reader).Iterator();
+            return filters[index].GetDocIdSet(reader).Iterator();
         }
 
         /// <summary>
@@ -73,7 +74,7 @@ namespace Lucene.Net.Search
                     }
                     else
                     {
-                        DocIdSet dis = ((Filter)shouldFilters[i]).GetDocIdSet(reader);
+                        DocIdSet dis = shouldFilters[i].GetDocIdSet(reader);
                         if (dis is OpenBitSet)
                         {
                             // optimized case for OpenBitSets
@@ -98,7 +99,7 @@ namespace Lucene.Net.Search
                     }
                     else
                     {
-                        DocIdSet dis = ((Filter)notFilters[i]).GetDocIdSet(reader);
+                        DocIdSet dis = notFilters[i].GetDocIdSet(reader);
                         if (dis is OpenBitSet)
                         {
                             // optimized case for OpenBitSets
@@ -122,7 +123,7 @@ namespace Lucene.Net.Search
                     }
                     else
                     {
-                        DocIdSet dis = ((Filter)mustFilters[i]).GetDocIdSet(reader);
+                        DocIdSet dis = mustFilters[i].GetDocIdSet(reader);
                         if (dis is OpenBitSet)
                         {
                             // optimized case for OpenBitSets
@@ -139,58 +140,52 @@ namespace Lucene.Net.Search
             if (res != null)
                 return FinalResult(res, reader.MaxDoc());
 
-            else
-            {
-                //TODO: 2.- change return DocIdSet.EMPTY_DOCIDSET;
-                return null;
-            }
+            return DocIdSet.EMPTY_DOCIDSET;
+        }
+
+        /** Provide a SortedVIntList when it is definitely smaller
+         * than an OpenBitSet.
+         * @deprecated Either use CachingWrapperFilter, or
+         * switch to a different DocIdSet implementation yourself. 
+         * This method will be removed in Lucene 4.0
+         */
+        protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
+        {
+            return result;
         }
 
         /// <summary>
         /// Add a filter clause.
         /// </summary>
         /// <param name="filterClause">The clause to add.</param>
-        public void Add(BooleanFilterClause filterClause)
+        public void Add(FilterClause filterClause)
         {
-            if (filterClause.Occur == BooleanClause.Occur.MUST)
+            if (filterClause.GetOccur() == BooleanClause.Occur.MUST)
             {
                 if (mustFilters == null)
                 {
-                    mustFilters = new List<Filter>();
+                    mustFilters = new EquatableList<Filter>();
                 }
-                mustFilters.Add(filterClause.Filter);
+                mustFilters.Add(filterClause.GetFilter());
             }
-            if (filterClause.Occur == BooleanClause.Occur.SHOULD)
+            if (filterClause.GetOccur() == BooleanClause.Occur.SHOULD)
             {
                 if (shouldFilters == null)
                 {
-                    shouldFilters = new List<Filter>();
+                    shouldFilters = new EquatableList<Filter>();
                 }
-                shouldFilters.Add(filterClause.Filter);
+                shouldFilters.Add(filterClause.GetFilter());
             }
-            if (filterClause.Occur == BooleanClause.Occur.MUST_NOT)
+            if (filterClause.GetOccur() == BooleanClause.Occur.MUST_NOT)
             {
                 if (notFilters == null)
                 {
-                    notFilters = new List<Filter>();
+                    notFilters = new EquatableList<Filter>();
                 }
-                notFilters.Add(filterClause.Filter);
+                notFilters.Add(filterClause.GetFilter());
             }
         }
 
-        // TODO: in 3.0, instead of removing this deprecated
-        // method, make it a no-op and mark it final
-        /** Provide a SortedVIntList when it is definitely smaller
-         * than an OpenBitSet.
-         * @deprecated Either use CachingWrapperFilter, or
-         * switch to a different DocIdSet implementation yourself. */
-        protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
-        {
-            return (result.Cardinality() < (maxDocs / 9))
-              ? (DocIdSet)new SortedVIntList(result)
-              : (DocIdSet)result;
-        }
-
         /// <summary>
         /// Determine equality between two lists.
         /// </summary>
@@ -283,38 +278,4 @@ namespace Lucene.Net.Search
             }
         }
     }
-
-    /// <summary>
-    /// A spefic clause that makes up a part of the BooleanFilter
-    /// </summary>
-    public class BooleanFilterClause
-    {
-        /// <summary>
-        /// Create a new BooleanFilterClause
-        /// </summary>
-        /// <param name="filter">A Filter object</param>
-        /// <param name="occur">A parameter implementation indicating SHOULD, MUST or MUST NOT</param>
-        public BooleanFilterClause(Filter filter, BooleanClause.Occur occur)
-        {
-            this.Occur = occur;
-            this.Filter = filter;
-        }
-
-        /// <summary>
-        /// The underlying filter for the clause.
-        /// </summary>
-        public Filter Filter
-        {
-            get;
-            private set;
-        }
-        /// <summary>
-        /// The occurrence of this clause.
-        /// </summary>
-        public BooleanClause.Occur Occur
-        {
-            get;
-            private set;
-        }
-    }
 }

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Contrib.Queries.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Contrib.Queries.csproj?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Contrib.Queries.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Contrib.Queries.csproj Wed Nov 23 06:55:21 2011
@@ -19,7 +19,6 @@
  under the License.
 
 -->
-
 <Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup>
     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -92,7 +91,6 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Similar\MoreLikeThisQuery.cs" />
     <Compile Include="Similar\SimilarityQueries.cs" />
-    <Compile Include="Support.cs" />
     <Compile Include="TermsFilter.cs" />
   </ItemGroup>
   <ItemGroup>
@@ -126,6 +124,9 @@
   <ItemGroup>
     <None Include="Lucene.Net.snk" />
   </ItemGroup>
+  <ItemGroup>
+    <Content Include="FileDiffs.txt" />
+  </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/DuplicateFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/DuplicateFilter.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/DuplicateFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/DuplicateFilter.cs Wed Nov 23 06:55:21 2011
@@ -28,7 +28,6 @@ namespace Lucene.Net.Search
 {
     public class DuplicateFilter : Filter
     {
-
         String fieldName;
 
         /**
@@ -82,7 +81,6 @@ namespace Lucene.Net.Search
 
         private OpenBitSet CorrectBits(IndexReader reader)
         {
-
             OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid
             Term startTerm = new Term(fieldName);
             TermEnum te = reader.Terms(startTerm);
@@ -121,7 +119,6 @@ namespace Lucene.Net.Search
 
         private OpenBitSet FastBits(IndexReader reader)
         {
-
             OpenBitSet bits = new OpenBitSet(reader.MaxDoc());
             bits.Set(0, reader.MaxDoc()); //assume all are valid
             Term startTerm = new Term(fieldName);
@@ -163,28 +160,6 @@ namespace Lucene.Net.Search
             return bits;
         }
 
-        //    /**
-        //     * <param name="args"></param>
-        //     * @throws IOException 
-        //     * @throws Exception 
-        //     */
-        //    public static void main(String[] args) 
-        //    {
-        //        IndexReader r=IndexReader.open("/indexes/personCentricAnon");
-        ////		IndexReader r=IndexReader.open("/indexes/enron");
-        //        long start=System.currentTimeMillis();
-        ////		DuplicateFilter df = new DuplicateFilter("threadId",KM_USE_FIRST_OCCURRENCE, PM_FAST_INVALIDATION);
-        ////		DuplicateFilter df = new DuplicateFilter("threadId",KM_USE_LAST_OCCURRENCE, PM_FAST_INVALIDATION);
-        //        DuplicateFilter df = new DuplicateFilter("vehicle.vrm",KM_USE_LAST_OCCURRENCE, PM_FAST_INVALIDATION);
-        ////		DuplicateFilter df = new DuplicateFilter("title",USE_LAST_OCCURRENCE);
-        ////		df.setProcessingMode(PM_SLOW_VALIDATION);
-        //        BitSet b = df.bits(r);
-        //        long end=System.currentTimeMillis()-start;
-        //        System.out.println(b.cardinality()+" in "+end+" ms ");
-
-        //    }
-
-
         public String GetFieldName()
         {
             return fieldName;

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FileDiffs.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FileDiffs.txt?rev=1205303&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FileDiffs.txt (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FileDiffs.txt Wed Nov 23 06:55:21 2011
@@ -0,0 +1,11 @@
+similar\MoreLikeThis.java - PORTED
+similar\MoreLikeThisQuery.java - PORTED
+similar\SimilarityQueries.java - PORTED
+BooleanFilter.java - PORTED
+BoostingQuery.java - PORTED
+DuplicateFilter.java - PORTED
+FilterClause.java - PORTED
+FuzzyLikeThisQuery.java - PORTED
+TermsFilter.java - PORTED
+
+All Files and All tests ported
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FilterClause.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FilterClause.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FilterClause.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FilterClause.cs Wed Nov 23 06:55:21 2011
@@ -22,11 +22,45 @@ using System.Text;
 
 namespace Lucene.Net.Search
 {
-    class FilterClause
+    /**
+     * A Filter that wrapped with an indication of how that filter
+     * is used when composed with another filter.
+     * (Follows the boolean logic in BooleanClause for composition 
+     * of queries.)
+     */
+    [Serializable]
+    public class FilterClause
     {
-        public FilterClause()
+        BooleanClause.Occur occur;
+        Filter filter;
+
+        /**
+         * Create a new FilterClause
+         * @param filter A Filter object containing a BitSet
+         * @param occur A parameter implementation indicating SHOULD, MUST or MUST NOT
+         */
+        public FilterClause(Filter filter, BooleanClause.Occur occur)
+        {
+            this.occur = occur;
+            this.filter = filter;
+        }
+
+        /**
+         * Returns this FilterClause's filter
+         * @return A Filter object
+         */
+        public Filter GetFilter()
+        {
+            return filter;
+        }
+
+        /**
+         * Returns this FilterClause's occur parameter
+         * @return An Occur object
+         */
+        public BooleanClause.Occur GetOccur()
         {
-            throw new NotImplementedException("Not implemented yet.");
+            return occur;
         }
     }
-}
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FuzzyLikeThisQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FuzzyLikeThisQuery.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FuzzyLikeThisQuery.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/FuzzyLikeThisQuery.cs Wed Nov 23 06:55:21 2011
@@ -25,6 +25,7 @@ using Lucene.Net.Search;
 using Lucene.Net.Index;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 
 namespace Lucene.Net.Search
@@ -48,7 +49,7 @@ namespace Lucene.Net.Search
     {
         static Similarity sim = new DefaultSimilarity();
         Query rewrittenQuery = null;
-        ArrayList fieldVals = new ArrayList();
+        EquatableList<FieldVals> fieldVals = new EquatableList<FieldVals>();
         Analyzer analyzer;
 
         ScoreTermQueue q;
@@ -89,7 +90,7 @@ namespace Lucene.Net.Search
                 if (other.fieldVals != null)
                     return false;
             }
-            else if (!fieldVals.EqualsToArrayList(other.fieldVals))
+            else if (!fieldVals.Equals(other.fieldVals))
                 return false;
             if (ignoreTF != other.ignoreTF)
                 return false;
@@ -190,17 +191,17 @@ namespace Lucene.Net.Search
         {
             if (f.queryString == null) return;
             TokenStream ts = analyzer.TokenStream(f.fieldName, new System.IO.StringReader(f.queryString));
-            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));
+            TermAttribute termAtt = ts.AddAttribute<TermAttribute>();
 
             int corpusNumDocs = reader.NumDocs();
             Term internSavingTemplateTerm = new Term(f.fieldName); //optimization to avoid constructing new Term() objects
-            Hashtable processedTerms = new Hashtable();
+            HashSet<string> processedTerms = new HashSet<string>();
             while (ts.IncrementToken())
             {
                 String term = termAtt.Term();
                 if (!processedTerms.Contains(term))
                 {
-                    processedTerms.Add(term,term);
+                    processedTerms.Add(term);
                     ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                     float minScore = 0;
                     Term startTerm = internSavingTemplateTerm.CreateTerm(term);
@@ -224,8 +225,8 @@ namespace Lucene.Net.Search
                             if (variantsQ.Size() < MAX_VARIANTS_PER_TERM || score > minScore)
                             {
                                 ScoreTerm st = new ScoreTerm(possibleMatch, score, startTerm);
-                                variantsQ.Insert(st);
-                                minScore = ((ScoreTerm)variantsQ.Top()).score; // maintain minScore
+                                variantsQ.InsertWithOverflow(st);
+                                minScore = variantsQ.Top().score; // maintain minScore
                             }
                         }
                     }
@@ -244,9 +245,9 @@ namespace Lucene.Net.Search
                         int size = variantsQ.Size();
                         for (int i = 0; i < size; i++)
                         {
-                            ScoreTerm st = (ScoreTerm)variantsQ.Pop();
+                            ScoreTerm st = variantsQ.Pop();
                             st.score = (st.score * st.score) * sim.Idf(df, corpusNumDocs);
-                            q.Insert(st);
+                            q.InsertWithOverflow(st);
                         }
                     }
                 }
@@ -264,11 +265,6 @@ namespace Lucene.Net.Search
             {
                 AddTerms(reader, f);
             }
-            //for (Iterator iter = fieldVals.iterator(); iter.hasNext(); )
-            //{
-            //    FieldVals f = (FieldVals)iter.next();
-            //    addTerms(reader, f);
-            //}
             //clear the list of fields
             fieldVals.Clear();
 
@@ -278,28 +274,26 @@ namespace Lucene.Net.Search
             //create BooleanQueries to hold the variants for each token/field pair and ensure it
             // has no coord factor
             //Step 1: sort the termqueries by term/field
-            Hashtable variantQueries = new Hashtable();
+            HashMap<Term, List<ScoreTerm>> variantQueries = new HashMap<Term, List<ScoreTerm>>();
             int size = q.Size();
             for (int i = 0; i < size; i++)
             {
-                ScoreTerm st = (ScoreTerm)q.Pop();
-                ArrayList l = (ArrayList)variantQueries[st.fuzziedSourceTerm];
+                ScoreTerm st = q.Pop();
+                var l = variantQueries[st.fuzziedSourceTerm];
                 if (l == null)
                 {
-                    l = new ArrayList();
+                    l = new List<ScoreTerm>();
                     variantQueries.Add(st.fuzziedSourceTerm, l);
                 }
                 l.Add(st);
             }
             //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
-            foreach(ArrayList variants in variantQueries.Values)
-            //for (Iterator iter = variantQueries.values().iterator(); iter.hasNext(); )
+            foreach(var variants in variantQueries.Values)
             {
-                //ArrayList variants = (ArrayList)iter.next();
                 if (variants.Count == 1)
                 {
                     //optimize where only one selected variant
-                    ScoreTerm st = (ScoreTerm)variants[0];
+                    ScoreTerm st = variants[0];
                     TermQuery tq = new FuzzyTermQuery(st.term, ignoreTF);
                     tq.SetBoost(st.score); // set the boost to a mix of IDF and score
                     bq.Add(tq, BooleanClause.Occur.SHOULD);
@@ -308,9 +302,7 @@ namespace Lucene.Net.Search
                 {
                     BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants
                     foreach(ScoreTerm st in variants)
-                    //for (Iterator iterator2 = variants.iterator(); iterator2.hasNext(); )
                     {
-                        //ScoreTerm st = (ScoreTerm)iterator2.next();
                         TermQuery tq = new FuzzyTermQuery(st.term, ignoreTF);      // found a match
                         tq.SetBoost(st.score); // set the boost using the ScoreTerm's score
                         termVariants.Add(tq, BooleanClause.Occur.SHOULD);          // add to query                    
@@ -342,7 +334,7 @@ namespace Lucene.Net.Search
             }
         }
 
-        private class ScoreTermQueue : PriorityQueue
+        private class ScoreTermQueue : PriorityQueue<ScoreTerm>
         {
             public ScoreTermQueue(int size)
             {
@@ -352,10 +344,8 @@ namespace Lucene.Net.Search
             /* (non-Javadoc)
              * <see cref="org.apache.lucene.util.PriorityQueue.lessThan(java.lang.Object, java.lang.Object)"/>
              */
-            public override bool LessThan(Object a, Object b)
+            public override bool LessThan(ScoreTerm termA, ScoreTerm termB)
             {
-                ScoreTerm termA = (ScoreTerm)a;
-                ScoreTerm termB = (ScoreTerm)b;
                 if (termA.score == termB.score)
                     return termA.term.CompareTo(termB.term) > 0;
                 else
@@ -403,28 +393,7 @@ namespace Lucene.Net.Search
                     //IDF is already factored into individual term boosts
                     return 1;
                 }
-
-                public override float Coord(int overlap, int maxOverlap)
-                {
-                    return base.Coord(overlap, maxOverlap);
-                }
-
-                public override float LengthNorm(string fieldName, int numTokens)
-                {
-                    return base.LengthNorm(fieldName, numTokens);
-                }
-
-                public override float QueryNorm(float sumOfSquaredWeights)
-                {
-                    return base.QueryNorm(sumOfSquaredWeights);
-                }
-
-                public override float SloppyFreq(int distance)
-                {
-                    return base.SloppyFreq(distance);
-                }
             }
-
         }
 
 

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThis.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThis.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThis.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThis.cs Wed Nov 23 06:55:21 2011
@@ -16,8 +16,12 @@
  */
 
 using System;
-
-using PriorityQueue = Lucene.Net.Util.PriorityQueue;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
 using IndexReader = Lucene.Net.Index.IndexReader;
 using Term = Lucene.Net.Index.Term;
 using TermFreqVector = Lucene.Net.Index.TermFreqVector;
@@ -27,7 +31,6 @@ using TermQuery = Lucene.Net.Search.Term
 using BooleanQuery = Lucene.Net.Search.BooleanQuery;
 using IndexSearcher = Lucene.Net.Search.IndexSearcher;
 using Query = Lucene.Net.Search.Query;
-using Hits = Lucene.Net.Search.Hits;
 using Analyzer = Lucene.Net.Analysis.Analyzer;
 using TokenStream = Lucene.Net.Analysis.TokenStream;
 using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
@@ -36,8 +39,6 @@ using Lucene.Net.Analysis.Tokenattribute
 
 namespace Lucene.Net.Search.Similar
 {
-
-
     /// <summary> Generate "more like this" similarity queries. 
     /// Based on this mail:
     /// <pre>
@@ -114,6 +115,8 @@ namespace Lucene.Net.Search.Similar
     /// <ul>
     /// <li> <see cref="SetMinTermFreq"/> </li>
     /// <li> <see cref="SetMinDocFreq"/> </li>
+    /// <li> <see cref="SetMaxDocFreq"/></li>
+    /// <li> <see cref="SetMaxDocFreqPct"/></li>
     /// <li> <see cref="SetMinWordLen"/> </li>
     /// <li> <see cref="SetMaxWordLen"/></li>
     /// <li> <see cref="SetMaxQueryTerms"/></li>
@@ -144,7 +147,7 @@ namespace Lucene.Net.Search.Similar
         /// <summary> Default analyzer to parse source doc with.</summary>
         /// <seealso cref="GetAnalyzer">
         /// </seealso>
-        public static readonly Analyzer DEFAULT_ANALYZER = new StandardAnalyzer();
+        public static readonly Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
 
         /// <summary> Ignore terms with less than this frequency in the source doc.</summary>
         /// <seealso cref="GetMinTermFreq">
@@ -158,7 +161,15 @@ namespace Lucene.Net.Search.Similar
         /// </seealso>
         /// <seealso cref="SetMinDocFreq">
         /// </seealso>
-        public const int DEFALT_MIN_DOC_FREQ = 5;
+        public const int DEFAULT_MIN_DOC_FREQ = 5;
+
+        /// <summary>
+        /// Ignore words wich occur in more than this many docs
+        /// </summary>
+        /// <seealso cref="GetMaxDocFreq"/>
+        /// <seealso cref="SetMaxDocFreq"/>
+        /// <seealso cref="SetMaxDocFreqPct"/>
+        public const int DEFAULT_MAX_DOC_FREQ = int.MaxValue;
 
         /// <summary> Boost terms in query based on score.</summary>
         /// <seealso cref="IsBoost">
@@ -194,10 +205,10 @@ namespace Lucene.Net.Search.Similar
         /// </seealso>
         /// <seealso cref="GetStopWords">
         /// </seealso>
-        public static readonly System.Collections.Hashtable DEFAULT_STOP_WORDS = null;
+        public static readonly ISet<string> DEFAULT_STOP_WORDS = null;
 
         /// <summary> Current set of stop words.</summary>
-        private System.Collections.Hashtable stopWords = DEFAULT_STOP_WORDS;
+        private ISet<string> stopWords = DEFAULT_STOP_WORDS;
 
         /// <summary> Return a Query with no more than this many terms.
         /// 
@@ -217,7 +228,12 @@ namespace Lucene.Net.Search.Similar
         private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
 
         /// <summary> Ignore words which do not occur in at least this many docs.</summary>
-        private int minDocFreq = DEFALT_MIN_DOC_FREQ;
+        private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
+
+        /// <summary>
+        /// Ignore words which occur in more than this many docs.
+        /// </summary>
+        private int maxDocfreq = DEFAULT_MAX_DOC_FREQ;
 
         /// <summary> Should we apply a boost to the Query based on the scores?</summary>
         private bool boost = DEFAULT_BOOST;
@@ -228,8 +244,6 @@ namespace Lucene.Net.Search.Similar
         /// <summary> The maximum number of tokens to parse in each example doc field that is not stored with TermVector support</summary>
         private int maxNumTokensParsed = DEFAULT_MAX_NUM_TOKENS_PARSED;
 
-
-
         /// <summary> Ignore words if less than this len.</summary>
         private int minWordLen = DEFAULT_MIN_WORD_LENGTH;
 
@@ -266,7 +280,7 @@ namespace Lucene.Net.Search.Similar
         }
 
         /// <summary> Constructor requiring an IndexReader.</summary>
-        public MoreLikeThis(IndexReader ir) : this(ir,new DefaultSimilarity() )
+        public MoreLikeThis(IndexReader ir) : this(ir,new DefaultSimilarity())
         {
         }
 
@@ -332,7 +346,7 @@ namespace Lucene.Net.Search.Similar
         }
 
         /// <summary> Returns the frequency at which words will be ignored which do not occur in at least this
-        /// many docs. The default frequency is <see cref="DEFALT_MIN_DOC_FREQ"/>.
+        /// many docs. The default frequency is <see cref="DEFAULT_MIN_DOC_FREQ"/>.
         /// 
         /// </summary>
         /// <returns> the frequency at which words will be ignored which do not occur in at least this
@@ -355,6 +369,43 @@ namespace Lucene.Net.Search.Similar
             this.minDocFreq = minDocFreq;
         }
 
+        /// <summary>
+        /// Returns the maximum frequency in which words may still appear. 
+        /// Words that appear in more than this many docs will be ignored. The default frequency is 
+        /// <see cref="DEFAULT_MAX_DOC_FREQ"/>
+        /// </summary>
+        /// <returns>get the maximum frequency at which words are still allowed,  
+        /// words which occur in more docs than this are ignored.</returns>
+        public int GetMaxDocFreq()
+        {
+            return this.maxDocfreq;
+        }
+
+        /// <summary>
+        /// Set the maximum frequency in which words may still appear. Words that appear
+        /// in more than this many docs will be ignored.
+        /// </summary>
+        /// <param name="maxFreq">
+        /// the maximum count of documents that a term may appear 
+        /// in to be still considered relevant</param>
+        public void SetMaxDocFreq(int maxFreq)
+        {
+            this.maxDocfreq = maxFreq;
+        }
+
+        /// <summary>
+        /// Set the maximum percentage in which words may still appear. Words that appear
+        /// in more than this many percent of all docs will be ignored.
+        /// </summary>
+        /// <param name="maxPercentage">
+        /// the maximum percentage of documents (0-100) that a term may appear 
+        /// in to be still considered relevant
+        /// </param>
+        public void SetMaxDocFreqPct(int maxPercentage)
+        {
+            this.maxDocfreq = maxPercentage * ir.NumDocs() / 100;
+        }
+
         /// <summary> Returns whether to boost terms in query based on "score" or not. The default is
         /// <see cref="DEFAULT_BOOST"/>.
         /// 
@@ -459,7 +510,7 @@ namespace Lucene.Net.Search.Similar
         /// </seealso>
         /// <seealso cref="GetStopWords">
         /// </seealso>
-        public void SetStopWords(System.Collections.Hashtable stopWords)
+        public void SetStopWords(ISet<string> stopWords)
         {
             this.stopWords = stopWords;
         }
@@ -467,7 +518,7 @@ namespace Lucene.Net.Search.Similar
         /// <summary> Get the current stop words being used.</summary>
         /// <seealso cref="SetStopWords">
         /// </seealso>
-        public System.Collections.Hashtable GetStopWords()
+        public ISet<string> GetStopWords()
         {
             return stopWords;
         }
@@ -511,24 +562,16 @@ namespace Lucene.Net.Search.Similar
             maxNumTokensParsed = i;
         }
 
-
-
-
-        /// <summary> Return a query that will return docs like the passed lucene document ID.
-        /// 
-        /// </summary>
-        /// <param name="docNum">the documentID of the lucene doc to generate the 'More Like This" query for.
-        /// </param>
-        /// <returns> a query that will return docs like the passed lucene document ID.
-        /// </returns>
+        /// <summary>Return a query that will return docs like the passed lucene document ID.</summary>
+        /// <param name="docNum">the documentID of the lucene doc to generate the 'More Like This" query for.</param>
+        /// <returns> a query that will return docs like the passed lucene document ID.</returns>
         public Query Like(int docNum)
         {
             if (fieldNames == null)
             {
                 // gather list of valid fields from lucene
-                System.Collections.Generic.ICollection<string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED);
-                fieldNames = new string[fields.Count];
-                fields.CopyTo(fieldNames, 0);
+                ICollection<string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED);
+                fieldNames = fields.ToArray();
             }
 
             return CreateQuery(RetrieveTerms(docNum));
@@ -544,9 +587,8 @@ namespace Lucene.Net.Search.Similar
             if (fieldNames == null)
             {
                 // gather list of valid fields from lucene
-                System.Collections.Generic.ICollection<string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED);
-                fieldNames = new string[fields.Count];
-                fields.CopyTo(fieldNames, 0);
+                ICollection<string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED);
+                fieldNames = fields.ToArray();
             }
 
             return Like(new System.IO.StreamReader(f.FullName, System.Text.Encoding.Default));
@@ -559,7 +601,7 @@ namespace Lucene.Net.Search.Similar
         /// </returns>
         public Query Like(System.Uri u)
         {
-            return Like(new System.IO.StreamReader(((System.Net.HttpWebRequest)System.Net.WebRequest.Create(u)).GetResponse().GetResponseStream(), System.Text.Encoding.Default));
+            return Like(new System.IO.StreamReader((System.Net.WebRequest.Create(u)).GetResponse().GetResponseStream(), System.Text.Encoding.Default));
         }
 
         /// <summary> Return a query that will return docs like the passed stream.
@@ -583,7 +625,7 @@ namespace Lucene.Net.Search.Similar
         }
 
         /// <summary> Create the More like query from a PriorityQueue</summary>
-        private Query CreateQuery(PriorityQueue q)
+        private Query CreateQuery(PriorityQueue<object[]> q)
         {
             BooleanQuery query = new BooleanQuery();
             System.Object cur;
@@ -599,9 +641,9 @@ namespace Lucene.Net.Search.Similar
                 {
                     if (qterms == 0)
                     {
-                        bestScore = (float)((System.Single)ar[2]);
+                        bestScore = (float)ar[2];
                     }
-                    float myScore = (float)((System.Single)ar[2]);
+                    float myScore = (float)ar[2];
 
                     tq.SetBoost(boostFactor * myScore / bestScore);
                 }
@@ -630,19 +672,19 @@ namespace Lucene.Net.Search.Similar
         /// </summary>
         /// <param name="words">a map of words keyed on the word(String) with Int objects as the values.
         /// </param>
-        private PriorityQueue CreateQueue(System.Collections.IDictionary words)
+        private PriorityQueue<object[]> CreateQueue(IDictionary<string,Int> words)
         {
             // have collected all words in doc and their freqs
             int numDocs = ir.NumDocs();
             FreqQ res = new FreqQ(words.Count); // will order words by score
 
-            System.Collections.IEnumerator it = words.Keys.GetEnumerator();
+            var it = words.Keys.GetEnumerator();
             while (it.MoveNext())
             {
                 // for every word
-                System.String word = (System.String)it.Current;
+                System.String word = it.Current;
 
-                int tf = ((Int)words[word]).x; // term freq in the source doc
+                int tf = words[word].x; // term freq in the source doc
                 if (minTermFreq > 0 && tf < minTermFreq)
                 {
                     continue; // filter out words that don't occur enough times in the source
@@ -663,6 +705,11 @@ namespace Lucene.Net.Search.Similar
                     continue; // filter out words that don't occur in enough docs
                 }
 
+                if (docFreq > maxDocfreq)
+                {
+                    continue; // filter out words that occur in too many docs
+                }
+
                 if (docFreq == 0)
                 {
                     continue; // index update problem?
@@ -672,7 +719,7 @@ namespace Lucene.Net.Search.Similar
                 float score = tf * idf;
 
                 // only really need 1st 3 entries, other ones are for troubleshooting
-                res.Insert(new System.Object[] { word, topField, (float)score, (float)idf, (System.Int32)docFreq, (System.Int32)tf });
+                res.InsertWithOverflow(new System.Object[] { word, topField, score, idf, docFreq, tf });
             }
             return res;
         }
@@ -728,7 +775,8 @@ namespace Lucene.Net.Search.Similar
             temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
             temp_writer.AutoFlush = true;
             System.IO.StreamWriter o = temp_writer;
-            IndexReader r = IndexReader.Open(indexName);
+            FSDirectory dir = FSDirectory.Open(new DirectoryInfo(indexName));
+            IndexReader r = IndexReader.Open(dir, true);
             o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs");
 
             MoreLikeThis mlt = new MoreLikeThis(r);
@@ -751,17 +799,18 @@ namespace Lucene.Net.Search.Similar
 
             o.WriteLine("q: " + query);
             o.WriteLine();
-            IndexSearcher searcher = new IndexSearcher(indexName);
+            IndexSearcher searcher = new IndexSearcher(dir, true);
 
-            Hits hits = searcher.Search(query);
-            int len = hits.Length();
+            TopDocs hits = searcher.Search(query, null, 25);
+            int len = hits.TotalHits;
             o.WriteLine("found: " + len + " documents matching");
             o.WriteLine();
+            ScoreDoc[] scoreDocs = hits.ScoreDocs;
             for (int i = 0; i < System.Math.Min(25, len); i++)
             {
-                Document d = hits.Doc(i);
+                Document d = searcher.Doc(scoreDocs[i].doc);
                 System.String summary = d.Get("summary");
-                o.WriteLine("score  : " + hits.Score(i));
+                o.WriteLine("score  : " + scoreDocs[i].score);
                 o.WriteLine("url    : " + d.Get("url"));
                 o.WriteLine("\ttitle  : " + d.Get("title"));
                 if (summary != null)
@@ -775,9 +824,9 @@ namespace Lucene.Net.Search.Similar
         /// </summary>
         /// <param name="docNum">the id of the lucene document from which to find terms
         /// </param>
-        private PriorityQueue RetrieveTerms(int docNum)
+        private PriorityQueue<object[]> RetrieveTerms(int docNum)
         {
-            System.Collections.IDictionary termFreqMap = new System.Collections.Hashtable();
+            IDictionary<string,Int> termFreqMap = new HashMap<string,Int>();
             for (int i = 0; i < fieldNames.Length; i++)
             {
                 System.String fieldName = fieldNames[i];
@@ -810,7 +859,7 @@ namespace Lucene.Net.Search.Similar
         /// </param>
         /// <param name="vector">List of terms and their frequencies for a doc/field
         /// </param>
-        private void AddTermFrequencies(System.Collections.IDictionary termFreqMap, TermFreqVector vector)
+        private void AddTermFrequencies(IDictionary<string, Int> termFreqMap, TermFreqVector vector)
         {
             System.String[] terms = vector.GetTerms();
             int[] freqs = vector.GetTermFrequencies();
@@ -823,7 +872,7 @@ namespace Lucene.Net.Search.Similar
                     continue;
                 }
                 // increment frequency
-                Int cnt = (Int)termFreqMap[term];
+                Int cnt = termFreqMap[term];
                 if (cnt == null)
                 {
                     cnt = new Int();
@@ -843,12 +892,12 @@ namespace Lucene.Net.Search.Similar
         /// </param>
         /// <param name="fieldName">Used by analyzer for any special per-field analysis
         /// </param>
-        private void AddTermFrequencies(System.IO.TextReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
+        private void AddTermFrequencies(System.IO.TextReader r, IDictionary<string,Int> termFreqMap, System.String fieldName)
         {
             TokenStream ts = analyzer.TokenStream(fieldName, r);
 			int tokenCount=0;
 			// for every token
-			TermAttribute termAtt = (TermAttribute) ts.AddAttribute(typeof(TermAttribute));
+            TermAttribute termAtt = ts.AddAttribute<TermAttribute>();
 			
 			while (ts.IncrementToken()) {
 				string word = termAtt.Term();
@@ -862,7 +911,7 @@ namespace Lucene.Net.Search.Similar
 				}
 				
 				// increment frequency
-				Int cnt = (Int) termFreqMap[word];
+				Int cnt = termFreqMap[word];
 				if (cnt == null) {
                     termFreqMap[word] = new Int();
 				}
@@ -923,9 +972,9 @@ namespace Lucene.Net.Search.Similar
         /// </returns>
         /// <seealso cref="RetrieveInterestingTerms(System.IO.TextReader)">
         /// </seealso>
-        public PriorityQueue RetrieveTerms(System.IO.TextReader r)
+        public PriorityQueue<object[]> RetrieveTerms(System.IO.TextReader r)
         {
-            System.Collections.IDictionary words = new System.Collections.Hashtable();
+            IDictionary<string, Int> words = new HashMap<string,Int>();
             for (int i = 0; i < fieldNames.Length; i++)
             {
                 System.String fieldName = fieldNames[i];
@@ -937,8 +986,8 @@ namespace Lucene.Net.Search.Similar
 
         public System.String[] RetrieveInterestingTerms(int docNum)
         {
-            System.Collections.ArrayList al = new System.Collections.ArrayList(maxQueryTerms);
-            PriorityQueue pq = RetrieveTerms(docNum);
+            List<object> al = new List<object>(maxQueryTerms);
+            PriorityQueue<object[]> pq = RetrieveTerms(docNum);
             System.Object cur;
             int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
             // we just want to return the top words
@@ -947,9 +996,9 @@ namespace Lucene.Net.Search.Similar
                 System.Object[] ar = (System.Object[])cur;
                 al.Add(ar[0]); // the 1st entry is the interesting word
             }
-            System.String[] res = new System.String[al.Count];
-            // return (System.String[]) SupportClass.ICollectionSupport.ToArray(al, res);
-            return (System.String[])al.ToArray(typeof(System.String));
+            //System.String[] res = new System.String[al.Count];
+            //return al.toArray(res);
+            return al.Select(x => x.ToString()).ToArray();
         }
 
         /// <summary> Convenience routine to make it easy to return the most interesting words in a document.
@@ -966,8 +1015,8 @@ namespace Lucene.Net.Search.Similar
         /// </seealso>
         public System.String[] RetrieveInterestingTerms(System.IO.TextReader r)
         {
-            System.Collections.ArrayList al = new System.Collections.ArrayList(maxQueryTerms);
-            PriorityQueue pq = RetrieveTerms(r);
+            List<object> al = new List<object>(maxQueryTerms);
+            PriorityQueue<object[]> pq = RetrieveTerms(r);
             System.Object cur;
             int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
             // we just want to return the top words
@@ -976,25 +1025,23 @@ namespace Lucene.Net.Search.Similar
                 System.Object[] ar = (System.Object[])cur;
                 al.Add(ar[0]); // the 1st entry is the interesting word
             }
-            System.String[] res = new System.String[al.Count];
+            //System.String[] res = new System.String[al.Count];
             // return (System.String[]) SupportClass.ICollectionSupport.ToArray(al, res);
-            return (System.String[])al.ToArray(typeof(System.String));
+            return al.Select(x => x.ToString()).ToArray();
         }
 
         /// <summary> PriorityQueue that orders words by score.</summary>
-        private class FreqQ : PriorityQueue
+        private class FreqQ : PriorityQueue<object[]>
         {
             internal FreqQ(int s)
             {
                 Initialize(s);
             }
 
-            override public bool LessThan(System.Object a, System.Object b)
+            override public bool LessThan(System.Object[] aa, System.Object[] bb)
             {
-                System.Object[] aa = (System.Object[])a;
-                System.Object[] bb = (System.Object[])b;
-                System.Single fa = (System.Single)aa[2];
-                System.Single fb = (System.Single)bb[2];
+                float fa = (float)aa[2];
+                float fb = (float)bb[2];
                 return (float)fa > (float)fb;
             }
         }

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThisQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThisQuery.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThisQuery.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/MoreLikeThisQuery.cs Wed Nov 23 06:55:21 2011
@@ -35,15 +35,13 @@ namespace Lucene.Net.Search.Similar
  */
     public class MoreLikeThisQuery : Query
     {
-
-
         private String likeText;
         private String[] moreLikeFields;
         private Analyzer analyzer;
         float percentTermsToMatch = 0.3f;
         int minTermFrequency = 1;
         int maxQueryTerms = 5;
-        System.Collections.Hashtable stopWords = null;
+        ISet<string> stopWords = null;
         int minDocFreq = -1;
 
 
@@ -144,11 +142,11 @@ namespace Lucene.Net.Search.Similar
         {
             this.moreLikeFields = moreLikeFields;
         }
-        public System.Collections.Hashtable GetStopWords()
+        public ISet<string> GetStopWords()
         {
             return stopWords;
         }
-        public void SetStopWords(System.Collections.Hashtable stopWords)
+        public void SetStopWords(ISet<string> stopWords)
         {
             this.stopWords = stopWords;
         }

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/SimilarityQueries.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/SimilarityQueries.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/SimilarityQueries.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/Similar/SimilarityQueries.cs Wed Nov 23 06:55:21 2011
@@ -16,6 +16,7 @@
  */
 
 using System;
+using System.Collections.Generic;
 using Analyzer = Lucene.Net.Analysis.Analyzer;
 using TokenStream = Lucene.Net.Analysis.TokenStream;
 using Term = Lucene.Net.Index.Term;
@@ -82,13 +83,13 @@ namespace Similarity.Net
         /// <returns> a query with all unique words in 'body'
         /// </returns>
         /// <throws>  IOException this can't happen... </throws>
-        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
+        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, ISet<string> stop)
         {
             TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
-            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));
+            TermAttribute termAtt = ts.AddAttribute<TermAttribute>();
 
             BooleanQuery tmp = new BooleanQuery();
-            System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
+            ISet<string> already = new HashSet<string>(); // ignore dups
             while (ts.IncrementToken())
             {
                 String word = termAtt.Term();
@@ -96,9 +97,9 @@ namespace Similarity.Net
                 if (stop != null && stop.Contains(word))
                     continue;
                 // ignore dups
-                if (already.Contains(word) == true)
+                if (already.Contains(word))
                     continue;
-                already.Add(word, word);
+                already.Add(word);
                 // add to query
                 TermQuery tq = new TermQuery(new Term(field, word));
                 try

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/TermsFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/TermsFilter.cs?rev=1205303&r1=1205302&r2=1205303&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/TermsFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Queries/TermsFilter.cs Wed Nov 23 06:55:21 2011
@@ -33,7 +33,7 @@ namespace Lucene.Net.Search
         /// <summary>
         /// The set of terms for this filter.
         /// </summary>
-        protected HashSet<Term> terms = new HashSet<Term>();
+        protected ISet<Term> terms = new SortedSet<Term>();
 
         /// <summary>
         /// Add a term to the set.
@@ -83,6 +83,7 @@ namespace Lucene.Net.Search
                 return false;
             }
             TermsFilter test = (TermsFilter)obj;
+            // TODO: Does SortedSet have an issues like List<T>?  see EquatableList in Support
             return (terms == test.terms || (terms != null && terms.Equals(test.terms)));
         }