You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2010/05/30 16:20:28 UTC
svn commit: r949519 [1/2] - in
/lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net: SpellChecker.Net/
SpellChecker.Net/Spell/ Test/ Test/Test/
Author: digy
Date: Sun May 30 14:20:28 2010
New Revision: 949519
URL: http://svn.apache.org/viewvc?rev=949519&view=rev
Log:
LUCENENET-366 Spellchecker issues
(SpellChecker 2.9.2)
Added:
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/JaroWinklerDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LevenshteinDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/NGramDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/StringDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.csproj
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.csproj
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestJaroWinklerDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLevenshteinDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs
Removed:
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net-2.0.0.csproj
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test-2.0.0.csproj
Modified:
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/AssemblyInfo.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/Dictionary.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LuceneDictionary.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/SpellChecker.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/TRStringDistance.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.sln
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/AssemblyInfo.cs
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.sln
lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/AssemblyInfo.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/AssemblyInfo.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/AssemblyInfo.cs Sun May 30 14:20:28 2010
@@ -14,7 +14,7 @@ using System.Runtime.CompilerServices;
[assembly: AssemblyDefaultAlias("Lucene.Net.SpellChecker")]
[assembly: AssemblyCulture("")]
-[assembly: AssemblyInformationalVersionAttribute("2.0")]
+[assembly: AssemblyInformationalVersionAttribute("2.9")]
// Version information for an assembly consists of the following four values:
//
@@ -26,7 +26,7 @@ using System.Runtime.CompilerServices;
// You can specify all the values or you can default the Revision and Build Numbers
// by using the '*' as shown below:
-[assembly: AssemblyVersion("2.0.0.2")]
+[assembly: AssemblyVersion("2.9.2.1")]
//
// In order to sign your assembly you must specify a key to use. Refer to the
@@ -58,3 +58,5 @@ using System.Runtime.CompilerServices;
[assembly: AssemblyKeyName("")]
+
+//[assembly: System.Runtime.CompilerServices.InternalsVisibleTo("SpellcheckTests")]
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/Dictionary.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/Dictionary.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/Dictionary.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/Dictionary.cs Sun May 30 14:20:28 2010
@@ -25,13 +25,9 @@ namespace SpellChecker.Net.Search.Spell
{
/// <summary> A simple interface representing a Dictionary</summary>
- /// <author> Nicolas Maisonneuve
- /// </author>
- /// <version> 1.0
- /// </version>
public interface Dictionary
{
- /// <summary> return all the words present in the dictionnary</summary>
+ /// <summary> return all the words present in the dictionary</summary>
/// <returns> Iterator
/// </returns>
System.Collections.IEnumerator GetWordsIterator();
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/JaroWinklerDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/JaroWinklerDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/JaroWinklerDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/JaroWinklerDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace SpellChecker.Net.Search.Spell
+{
+ public class JaroWinklerDistance : StringDistance
+ {
+ private float threshold = 0.7f;
+
+ private int[] Matches(String s1, String s2)
+ {
+ String Max, Min;
+ if (s1.Length > s2.Length)
+ {
+ Max = s1;
+ Min = s2;
+ }
+ else
+ {
+ Max = s2;
+ Min = s1;
+ }
+ int range = Math.Max(Max.Length / 2 - 1, 0);
+ int[] matchIndexes = new int[Min.Length];
+ for (int i = 0; i < matchIndexes.Length; i++)
+ matchIndexes[i] = -1;
+ bool[] matchFlags = new bool[Max.Length];
+ int matches = 0;
+ for (int mi = 0; mi < Min.Length; mi++)
+ {
+ char c1 = Min[mi];
+ for (int xi = Math.Max(mi - range, 0), xn = Math.Min(mi + range + 1, Max
+ .Length); xi < xn; xi++)
+ {
+ if (!matchFlags[xi] && c1 == Max[xi])
+ {
+ matchIndexes[mi] = xi;
+ matchFlags[xi] = true;
+ matches++;
+ break;
+ }
+ }
+ }
+ char[] ms1 = new char[matches];
+ char[] ms2 = new char[matches];
+ for (int i = 0, si = 0; i < Min.Length; i++)
+ {
+ if (matchIndexes[i] != -1)
+ {
+ ms1[si] = Min[i];
+ si++;
+ }
+ }
+ for (int i = 0, si = 0; i < Max.Length; i++)
+ {
+ if (matchFlags[i])
+ {
+ ms2[si] = Max[i];
+ si++;
+ }
+ }
+ int transpositions = 0;
+ for (int mi = 0; mi < ms1.Length; mi++)
+ {
+ if (ms1[mi] != ms2[mi])
+ {
+ transpositions++;
+ }
+ }
+ int prefix = 0;
+ for (int mi = 0; mi < Min.Length; mi++)
+ {
+ if (s1[mi] == s2[mi])
+ {
+ prefix++;
+ }
+ else
+ {
+ break;
+ }
+ }
+ return new int[] { matches, transpositions / 2, prefix, Max.Length };
+ }
+
+ public float GetDistance(String s1, String s2)
+ {
+ int[] mtp = Matches(s1, s2);
+ float m = (float)mtp[0];
+ if (m == 0)
+ {
+ return 0f;
+ }
+ float j = ((m / s1.Length + m / s2.Length + (m - mtp[1]) / m)) / 3;
+ float jw = j < GetThreshold() ? j : j + Math.Min(0.1f, 1f / mtp[3]) * mtp[2]
+ * (1 - j);
+ return jw;
+ }
+
+ /// <summary>
+ ///Sets the threshold used to deterMine when Winkler bonus should be used.
+ /// Set to a negative value to get the Jaro distance.
+ /// </summary>
+ /// <param name="threshold">the new value of the threshold</param>
+ public void SetThreshold(float threshold)
+ {
+ this.threshold = threshold;
+ }
+
+ /// <summary>
+ /// Returns the current value of the threshold used for adding the Winkler bonus.
+ /// The default value is 0.7.
+ /// </summary>
+ /// <returns>the current value of the threshold</returns>
+ public float GetThreshold()
+ {
+ return threshold;
+ }
+
+ }
+}
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LevenshteinDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LevenshteinDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LevenshteinDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LevenshteinDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+using SpellChecker.Net.Search.Spell;
+
+namespace SpellChecker.Net.Search.Spell
+{
+ /// <summary>
+ /// Levenshtein edit distance
+ /// </summary>
+ public class LevenshteinDistance : StringDistance
+ {
+ /// <summary>
+ /// Returns a float between 0 and 1 based on how similar the specified strings are to one another.
+ /// Returning a value of 1 means the specified strings are identical and 0 means the
+ /// string are maximally different.
+ /// </summary>
+ /// <param name="target">The first string.</param>
+ /// <param name="other">The second string.</param>
+ /// <returns>a float between 0 and 1 based on how similar the specified strings are to one another.</returns>
+ public float GetDistance(String target, String other)
+ {
+ char[] sa;
+ int n;
+ int[] p; //'previous' cost array, horizontally
+ int[] d; // cost array, horizontally
+ int[] _d; //placeholder to assist in swapping p and d
+
+ /*
+ The difference between this impl. and the previous is that, rather
+ than creating and retaining a matrix of size s.length()+1 by t.length()+1,
+ we maintain two single-dimensional arrays of length s.length()+1. The first, d,
+ is the 'current working' distance array that maintains the newest distance cost
+ counts as we iterate through the characters of String s. Each time we increment
+ the index of String t we are comparing, d is copied to p, the second int[]. Doing so
+ allows us to retain the previous cost counts as required by the algorithm (taking
+ the minimum of the cost count to the left, up one, and diagonally up and to the left
+ of the current cost count being calculated). (Note that the arrays aren't really
+ copied anymore, just switched...this is clearly much better than cloning an array
+ or doing a System.arraycopy() each time through the outer loop.)
+
+ Effectively, the difference between the two implementations is this one does not
+ cause an out of memory condition when calculating the LD over two very large strings.
+ */
+
+ sa = target.ToCharArray();
+ n = sa.Length;
+ p = new int[n + 1];
+ d = new int[n + 1];
+ int m = other.Length;
+
+ if (n == 0 || m == 0)
+ {
+ if (n == m)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+
+ // indexes into strings s and t
+ int i; // iterates through s
+ int j; // iterates through t
+
+ char t_j; // jth character of t
+
+ int cost; // cost
+
+ for (i = 0; i <= n; i++)
+ {
+ p[i] = i;
+ }
+
+ for (j = 1; j <= m; j++)
+ {
+ t_j = other[j - 1];
+ d[0] = j;
+
+ for (i = 1; i <= n; i++)
+ {
+ cost = sa[i - 1] == t_j ? 0 : 1;
+ // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
+ d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
+ }
+
+ // copy current distance counts to 'previous row' distance counts
+ _d = p;
+ p = d;
+ d = _d;
+ }
+
+ // our last action in the above loop was to switch d and p, so p now
+ // actually has the most recent cost counts
+ return 1.0f - ((float)p[n] / Math.Max(other.Length, sa.Length));
+ }
+ }
+}
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LuceneDictionary.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LuceneDictionary.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LuceneDictionary.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/LuceneDictionary.cs Sun May 30 14:20:28 2010
@@ -22,18 +22,11 @@ using Term = Lucene.Net.Index.Term;
namespace SpellChecker.Net.Search.Spell
{
-
/// <summary> Lucene Dictionary
///
/// </summary>
- /// <author> Nicolas Maisonneuve
- /// </author>
public class LuceneDictionary : Dictionary
{
- virtual public System.Collections.IEnumerator GetWordsIterator()
- {
- return new LuceneIterator(this);
- }
internal IndexReader reader;
internal System.String field;
@@ -42,88 +35,84 @@ namespace SpellChecker.Net.Search.Spell
this.reader = reader;
this.field = field;
}
+
+ virtual public System.Collections.IEnumerator GetWordsIterator()
+ {
+ return new LuceneIterator(this);
+ }
+
+ public System.Collections.IEnumerator GetEnumerator()
+ {
+ return GetWordsIterator();
+ }
internal sealed class LuceneIterator : System.Collections.IEnumerator
{
- private void InitBlock(LuceneDictionary enclosingInstance)
- {
- this.enclosingInstance = enclosingInstance;
- }
- private LuceneDictionary enclosingInstance;
- public System.Object Current
- {
- get
- {
- if (!has_next_called)
- {
- MoveNext();
- }
- has_next_called = false;
- return (actualTerm != null) ? actualTerm.Text() : null;
- }
-
- }
- public LuceneDictionary Enclosing_Instance
- {
- get
- {
- return enclosingInstance;
- }
-
- }
private TermEnum termEnum;
private Term actualTerm;
- private bool has_next_called;
+ private bool hasNextCalled;
+
+ private LuceneDictionary enclosingInstance;
public LuceneIterator(LuceneDictionary enclosingInstance)
{
- InitBlock(enclosingInstance);
+ this.enclosingInstance = enclosingInstance;
try
{
- termEnum = Enclosing_Instance.reader.Terms(new Term(Enclosing_Instance.field, ""));
+ termEnum = enclosingInstance.reader.Terms(new Term(enclosingInstance.field, ""));
}
catch (System.IO.IOException ex)
{
System.Console.Error.WriteLine(ex.StackTrace);
}
}
-
-
- public bool MoveNext()
+
+ //next()
+ public System.Object Current
{
- has_next_called = true;
- try
+ get
{
- // if there is still words
- if (!termEnum.Next())
+ if (!hasNextCalled)
{
- actualTerm = null;
- return false;
- }
- // if the next word are in the field
- actualTerm = termEnum.Term();
- System.String fieldt = actualTerm.Field();
- if ( fieldt != Enclosing_Instance.field)
- {
- actualTerm = null;
- return false;
+ MoveNext();
}
- return true;
+ hasNextCalled = false;
+ return (actualTerm != null) ? actualTerm.Text() : null;
}
- catch (System.IO.IOException ex)
+
+ }
+
+ //hasNext()
+ public bool MoveNext()
+ {
+ hasNextCalled = true;
+
+ actualTerm = termEnum.Term();
+
+ // if there are no words return false
+ if (actualTerm == null) return false;
+
+ System.String fieldt = actualTerm.Field();
+ termEnum.Next();
+
+ // if the next word doesn't have the same field return false
+ if (fieldt != enclosingInstance.field)
{
- System.Console.Error.WriteLine(ex.StackTrace);
+ actualTerm = null;
return false;
}
+ return true;
}
-
- public void Remove()
+
+ public void Remove()
{
+ throw new NotImplementedException();
}
-
- public void Reset()
+
+ public void Reset()
{
+ throw new NotImplementedException();
}
}
}
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/NGramDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/NGramDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/NGramDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/NGramDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace SpellChecker.Net.Search.Spell
+{
+ public class NGramDistance : StringDistance
+ {
+ private int n;
+
+ /// <summary>
+ /// Creates an N-Gram distance measure using n-grams of the specified size.
+ /// </summary>
+ /// <param name="size">The size of the n-gram to be used to compute the string distance.</param>
+ public NGramDistance(int size)
+ {
+ this.n = size;
+ }
+
+ /// <summary>
+ /// Creates an N-Gram distance measure using n-grams of size 2.
+ /// </summary>
+ public NGramDistance()
+ : this(2)
+ {
+ }
+
+ public float GetDistance(String source, String target)
+ {
+ int sl = source.Length;
+ int tl = target.Length;
+
+ if (sl == 0 || tl == 0)
+ {
+ if (sl == tl)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ int cost = 0;
+ if (sl < n || tl < n)
+ {
+ for (int ii = 0, ni = Math.Min(sl, tl); ii < ni; ii++)
+ {
+ if (source[ii] == target[ii])
+ {
+ cost++;
+ }
+ }
+ return (float)cost / Math.Max(sl, tl);
+ }
+
+ char[] sa = new char[sl + n - 1];
+ float[] p; //'previous' cost array, horizontally
+ float[] d; // cost array, horizontally
+ float[] _d; //placeholder to assist in swapping p and d
+
+ //construct sa with prefix
+ for (int ii = 0; ii < sa.Length; ii++)
+ {
+ if (ii < n - 1)
+ {
+ sa[ii] = (char)0; //add prefix
+ }
+ else
+ {
+ sa[ii] = source[ii - n + 1];
+ }
+ }
+ p = new float[sl + 1];
+ d = new float[sl + 1];
+
+ // indexes into strings s and t
+ int i; // iterates through source
+ int j; // iterates through target
+
+ char[] t_j = new char[n]; // jth n-gram of t
+
+ for (i = 0; i <= sl; i++)
+ {
+ p[i] = i;
+ }
+
+ for (j = 1; j <= tl; j++)
+ {
+ //construct t_j n-gram
+ if (j < n)
+ {
+ for (int ti = 0; ti < n - j; ti++)
+ {
+ t_j[ti] = (char)0; //add prefix
+ }
+ for (int ti = n - j; ti < n; ti++)
+ {
+ t_j[ti] = target[ti - (n - j)];
+ }
+ }
+ else
+ {
+ t_j = target.Substring(j - n, n).ToCharArray();
+ }
+ d[0] = j;
+ for (i = 1; i <= sl; i++)
+ {
+ cost = 0;
+ int tn = n;
+ //compare sa to t_j
+ for (int ni = 0; ni < n; ni++)
+ {
+ if (sa[i - 1 + ni] != t_j[ni])
+ {
+ cost++;
+ }
+ else if (sa[i - 1 + ni] == 0)
+ { //discount matches on prefix
+ tn--;
+ }
+ }
+ float ec = (float)cost / tn;
+ // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
+ d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec);
+ }
+ // copy current distance counts to 'previous row' distance counts
+ _d = p;
+ p = d;
+ d = _d;
+ }
+
+ // our last action in the above loop was to switch d and p, so p now
+ // actually has the most recent cost counts
+ return 1.0f - ((float)p[sl] / Math.Max(tl, sl));
+ }
+
+
+ }
+}
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/SpellChecker.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/SpellChecker.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/SpellChecker.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/SpellChecker.cs Sun May 30 14:20:28 2010
@@ -30,11 +30,12 @@ using IndexSearcher = Lucene.Net.Search.
using Query = Lucene.Net.Search.Query;
using TermQuery = Lucene.Net.Search.TermQuery;
using Directory = Lucene.Net.Store.Directory;
+using SpellChecker.Net.Search.Spell;
+using Lucene.Net.Store;
+using Lucene.Net.Search;
namespace SpellChecker.Net.Search.Spell
{
-
-
/// <summary> <p>
/// Spell Checker class (Main class) <br/>
/// (initially inspired by the David Spencer code).
@@ -58,38 +59,117 @@ namespace SpellChecker.Net.Search.Spell
/// </version>
public class SpellChecker
{
- virtual public void SetSpellIndex(Directory spellindex)
- {
- this.spellindex = spellindex;
- }
- /// <summary> Set the accuracy 0 < min < 1; default 0.5</summary>
- virtual public void SetAccuracy(float minScore)
- {
- this.minScore = minScore;
- }
-
/// <summary> Field name for each word in the ngram index.</summary>
public const System.String F_WORD = "word";
-
-
+ private readonly Term F_WORD_TERM = new Term(F_WORD);
+
/// <summary> the spell index</summary>
internal Directory spellindex;
-
+
/// <summary> Boost value for start and end grams</summary>
private float bStart = 2.0f;
private float bEnd = 1.0f;
-
-
- private IndexReader reader;
+
+ //private IndexReader reader;
+ // don't use this searcher directly - see #swapSearcher()
+ private IndexSearcher searcher;
+
+ /// <summary>
+ /// this locks all modifications to the current searcher.
+ /// </summary>
+ private static System.Object searcherLock = new System.Object();
+
+ /*
+ * this lock synchronizes all possible modifications to the
+ * current index directory. It should not be possible to try modifying
+ * the same index concurrently. Note: Do not acquire the searcher lock
+ * before acquiring this lock!
+ */
+ private static System.Object modifyCurrentIndexLock = new System.Object();
+ private volatile bool closed = false;
+
internal float minScore = 0.5f; //LUCENENET-359 Spellchecker accuracy gets overwritten
-
-
- public SpellChecker(Directory gramIndex)
+
+ private StringDistance sd;
+
+ /// <summary>
+ /// Use the given directory as a spell checker index. The directory
+ /// is created if it doesn't exist yet.
+ /// </summary>
+ /// <param name="gramIndex">the spell index directory</param>
+ /// <param name="sd">the {@link StringDistance} measurement to use </param>
+ public SpellChecker(Directory gramIndex, StringDistance sd)
{
this.SetSpellIndex(gramIndex);
+ this.setStringDistance(sd);
+ }
+
+ /// <summary>
+ /// Use the given directory as a spell checker index with a
+ /// {@link LevensteinDistance} as the default {@link StringDistance}. The
+ /// directory is created if it doesn't exist yet.
+ /// </summary>
+ /// <param name="gramIndex">the spell index directory</param>
+ public SpellChecker(Directory gramIndex)
+ : this(gramIndex, new LevenshteinDistance())
+ { }
+
+ /// <summary>
+ /// Use a different index as the spell checker index or re-open
+ /// the existing index if <code>spellIndex</code> is the same value
+ /// as given in the constructor.
+ /// </summary>
+ /// <param name="spellIndexDir">spellIndexDir the spell directory to use </param>
+ /// <throws>AlreadyClosedException if the Spellchecker is already closed</throws>
+ /// <throws>IOException if spellchecker can not open the directory</throws>
+ virtual public void SetSpellIndex(Directory spellIndexDir)
+ {
+ // this could be the same directory as the current spellIndex
+ // modifications to the directory should be synchronized
+ lock (modifyCurrentIndexLock)
+ {
+ EnsureOpen();
+ if (!IndexReader.IndexExists(spellIndexDir))
+ {
+ IndexWriter writer = new IndexWriter(spellIndexDir, null, true,
+ IndexWriter.MaxFieldLength.UNLIMITED);
+ writer.Close();
+ }
+ SwapSearcher(spellIndexDir);
+ }
+ }
+
+ /// <summary>
+ /// Sets the {@link StringDistance} implementation for this
+ /// {@link SpellChecker} instance.
+ /// </summary>
+ /// <param name="sd">the {@link StringDistance} implementation for this
+ /// {@link SpellChecker} instance.</param>
+ public void setStringDistance(StringDistance sd)
+ {
+ this.sd = sd;
+ }
+
+ /// <summary>
+ /// Returns the {@link StringDistance} instance used by this
+ /// {@link SpellChecker} instance.
+ /// </summary>
+ /// <returns>
+ /// Returns the {@link StringDistance} instance used by this
+ /// {@link SpellChecker} instance.
+ /// </returns>
+ public StringDistance GetStringDistance()
+ {
+ return sd;
+ }
+
+
+ /// <summary> Set the accuracy 0 < min < 1; default 0.5</summary>
+ virtual public void SetAccuracy(float minScore)
+ {
+ this.minScore = minScore;
}
-
-
+
/// <summary> Suggest similar words</summary>
/// <param name="word">String the word you want a spell check done on
/// </param>
@@ -102,12 +182,12 @@ namespace SpellChecker.Net.Search.Spell
{
return this.SuggestSimilar(word, num_sug, null, null, false);
}
-
-
+
+
/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
/// <param name="word">String the word you want a spell check done on
/// </param>
- /// <param name="num_sug">int the number of suggest words
+ /// <param name="numSug">int the number of suggest words
/// </param>
/// <param name="ir">the indexReader of the user index (can be null see field param)
/// </param>
@@ -122,120 +202,133 @@ namespace SpellChecker.Net.Search.Spell
/// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
/// of the suggest words in the field of the user index
/// </returns>
- public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular)
- {
- float min = this.minScore;
- TRStringDistance sd = new TRStringDistance(word);
- int lengthWord = word.Length;
-
- int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0;
- if (!morePopular && goalFreq > 0)
- {
- return new System.String[]{word}; // return the word if it exist in the index and i don't want a more popular word
- }
-
- BooleanQuery query = new BooleanQuery();
- System.String[] grams;
- System.String key;
-
- for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
- {
-
- key = "gram" + ng; // form key
-
- grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
-
- if (grams.Length == 0)
- {
- continue; // hmm
- }
-
- if (bStart > 0)
- {
- // should we boost prefixes?
- Add(query, "start" + ng, grams[0], bStart); // matches start of word
- }
- if (bEnd > 0)
- {
- // should we boost suffixes
- Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
- }
- for (int i = 0; i < grams.Length; i++)
- {
- Add(query, key, grams[i]);
- }
- }
-
- IndexSearcher searcher = new IndexSearcher(this.spellindex);
- Hits hits = searcher.Search(query);
- SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);
-
- int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
- SuggestWord sugword = new SuggestWord();
- for (int i = 0; i < stop; i++)
+ public virtual System.String[] SuggestSimilar(System.String word, int numSug, IndexReader ir, System.String field, bool morePopular)
+ { // obtainSearcher calls ensureOpen
+ IndexSearcher indexSearcher = ObtainSearcher();
+ try
{
-
- sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word)
-
- if (sugword.string_Renamed.Equals(word))
+ float min = this.minScore;
+ int lengthWord = word.Length;
+
+ int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
+ int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
+ // if the word exists in the real index and we don't care for word frequency, return the word itself
+ if (!morePopular && freq > 0)
{
- continue; // don't suggest a word for itself, that would be silly
+ return new String[] { word };
}
-
- //edit distance/normalize with the min word length
- sugword.score = 1.0f - ((float) sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord));
- if (sugword.score < min)
+
+ BooleanQuery query = new BooleanQuery();
+ String[] grams;
+ String key;
+
+ for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
{
- continue;
+
+ key = "gram" + ng; // form key
+
+ grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
+
+ if (grams.Length == 0)
+ {
+ continue; // hmm
+ }
+
+ if (bStart > 0)
+ { // should we boost prefixes?
+ Add(query, "start" + ng, grams[0], bStart); // matches start of word
+
+ }
+ if (bEnd > 0)
+ { // should we boost suffixes
+ Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
+
+ }
+ for (int i = 0; i < grams.Length; i++)
+ {
+ Add(query, key, grams[i]);
+ }
}
-
- if (ir != null)
+
+ int maxHits = 10 * numSug;
+
+ // System.out.println("Q: " + query);
+ ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).scoreDocs;
+ // System.out.println("HITS: " + hits.length());
+ SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
+
+ // go thru more than 'maxr' matches in case the distance filter triggers
+ int stop = Math.Min(hits.Length, maxHits);
+ SuggestWord sugWord = new SuggestWord();
+ for (int i = 0; i < stop; i++)
{
- // use the user index
- sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index
- if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1)
+
+ sugWord.string_Renamed = indexSearcher.Doc(hits[i].doc).Get(F_WORD); // get orig word
+
+ // don't suggest a word for itself, that would be silly
+ if (sugWord.string_Renamed.Equals(word))
+ {
+ continue;
+ }
+
+ // edit distance
+ sugWord.score = sd.GetDistance(word, sugWord.string_Renamed);
+ if (sugWord.score < min)
{
- // don't suggest a word that is not present in the field
continue;
}
+
+ if (ir != null && field != null)
+ { // use the user index
+ sugWord.freq = ir.DocFreq(new Term(field, sugWord.string_Renamed)); // freq in the index
+ // don't suggest a word that is not present in the field
+ if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1)
+ {
+ continue;
+ }
+ }
+ sugQueue.InsertWithOverflow(sugWord);
+ if (sugQueue.Size() == numSug)
+ {
+ // if queue full, maintain the minScore score
+ min = ((SuggestWord)sugQueue.Top()).score;
+ }
+ sugWord = new SuggestWord();
}
- sugqueue.Insert(sugword);
- if (sugqueue.Size() == num_sug)
+
+ // convert to array string
+ String[] list = new String[sugQueue.Size()];
+ for (int i = sugQueue.Size() - 1; i >= 0; i--)
{
- //if queue full , maintain the min score
- min = ((SuggestWord) sugqueue.Top()).score;
+ list[i] = ((SuggestWord)sugQueue.Pop()).string_Renamed;
}
- sugword = new SuggestWord();
+
+ return list;
}
-
- // convert to array string
- System.String[] list = new System.String[sugqueue.Size()];
- for (int i = sugqueue.Size() - 1; i >= 0; i--)
+ finally
{
- list[i] = ((SuggestWord) sugqueue.Pop()).string_Renamed;
+ ReleaseSearcher(indexSearcher);
}
-
- searcher.Close();
- return list;
+
}
-
-
+
+
/// <summary> Add a clause to a boolean query.</summary>
- private static void Add(BooleanQuery q, System.String k, System.String v, float boost)
+ private static void Add(BooleanQuery q, System.String k, System.String v, float boost)
{
Query tq = new TermQuery(new Term(k, v));
tq.SetBoost(boost);
q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
}
-
-
+
+
/// <summary> Add a clause to a boolean query.</summary>
- private static void Add(BooleanQuery q, System.String k, System.String v)
+ private static void Add(BooleanQuery q, System.String k, System.String v)
{
q.Add(new BooleanClause(new TermQuery(new Term(k, v)), BooleanClause.Occur.SHOULD));
}
-
-
+
+
/// <summary> Form all ngrams for a given word.</summary>
/// <param name="text">the word to parse
/// </param>
@@ -253,16 +346,23 @@ namespace SpellChecker.Net.Search.Spell
}
return res;
}
-
-
- public virtual void ClearIndex()
- {
- IndexReader.Unlock(spellindex);
- IndexWriter writer = new IndexWriter(spellindex, null, true);
- writer.Close();
+
+ /// <summary>
+ /// Removes all terms from the spell check index.
+ /// </summary>
+ public virtual void ClearIndex()
+ {
+ lock (modifyCurrentIndexLock)
+ {
+ EnsureOpen();
+ Directory dir = this.spellindex;
+ IndexWriter writer = new IndexWriter(dir, null, true, IndexWriter.MaxFieldLength.UNLIMITED);
+ writer.Close();
+ SwapSearcher(dir);
+ }
}
-
-
+
+
/// <summary> Check whether the word exists in the index.</summary>
/// <param name="word">String
/// </param>
@@ -271,56 +371,74 @@ namespace SpellChecker.Net.Search.Spell
/// </returns>
public virtual bool Exist(System.String word)
{
- if (reader == null)
+ // obtainSearcher calls ensureOpen
+ IndexSearcher indexSearcher = ObtainSearcher();
+ try
+ {
+ return indexSearcher.DocFreq(F_WORD_TERM.CreateTerm(word)) > 0;
+ }
+ finally
{
- reader = IndexReader.Open(spellindex);
+ ReleaseSearcher(indexSearcher);
}
- return reader.DocFreq(new Term(F_WORD, word)) > 0;
}
-
-
+
+
/// <summary> Index a Dictionary</summary>
- /// <param name="dict">the dictionary to index
- /// </param>
+ /// <param name="dict">the dictionary to index</param>
+ /// <param name="mergeFactor">mergeFactor to use when indexing</param>
+ /// <param name="ramMB">the max amount or memory in MB to use</param>
/// <throws> IOException </throws>
- public virtual void IndexDictionary(Dictionary dict)
+ /// <throws>AlreadyClosedException if the Spellchecker is already closed</throws>
+ public virtual void IndexDictionary(Dictionary dict, int mergeFactor, int ramMB)
+ {
+ lock (modifyCurrentIndexLock)
+ {
+ EnsureOpen();
+ Directory dir = this.spellindex;
+ IndexWriter writer = new IndexWriter(spellindex, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ writer.SetMergeFactor(mergeFactor);
+ writer.SetMaxBufferedDocs(ramMB);
+
+ System.Collections.IEnumerator iter = dict.GetWordsIterator();
+ while (iter.MoveNext())
+ {
+ System.String word = (System.String)iter.Current;
+
+ int len = word.Length;
+ if (len < 3)
+ {
+ continue; // too short we bail but "too long" is fine...
+ }
+
+ if (this.Exist(word))
+ {
+ // if the word already exist in the gramindex
+ continue;
+ }
+
+ // ok index the word
+ Document doc = CreateDocument(word, GetMin(len), GetMax(len));
+ writer.AddDocument(doc);
+ }
+ // close writer
+ writer.Optimize();
+ writer.Close();
+ // also re-open the spell index to see our own changes when the next suggestion
+ // is fetched:
+ SwapSearcher(dir);
+ }
+ }
+
+ /// <summary>
+ /// Indexes the data from the given {@link Dictionary}.
+ /// </summary>
+ /// <param name="dict">dict the dictionary to index</param>
+ public void IndexDictionary(Dictionary dict)
{
- IndexReader.Unlock(spellindex);
- IndexWriter writer = new IndexWriter(spellindex, new WhitespaceAnalyzer(), !IndexReader.IndexExists(spellindex));
- writer.SetMergeFactor(300);
- writer.SetMaxBufferedDocs(150);
-
- System.Collections.IEnumerator iter = dict.GetWordsIterator();
- while (iter.MoveNext())
- {
- System.String word = (System.String) iter.Current;
-
- int len = word.Length;
- if (len < 3)
- {
- continue; // too short we bail but "too long" is fine...
- }
-
- if (this.Exist(word))
- {
- // if the word already exist in the gramindex
- continue;
- }
-
- // ok index the word
- Document doc = CreateDocument(word, GetMin(len), GetMax(len));
- writer.AddDocument(doc);
- }
- // close writer
- writer.Optimize();
- writer.Close();
-
- // close reader
- reader.Close();
- reader = null;
+ IndexDictionary(dict, 300, 10);
}
-
-
+
private int GetMin(int l)
{
if (l > 5)
@@ -333,8 +451,8 @@ namespace SpellChecker.Net.Search.Spell
}
return 1;
}
-
-
+
+
private int GetMax(int l)
{
if (l > 5)
@@ -347,18 +465,18 @@ namespace SpellChecker.Net.Search.Spell
}
return 2;
}
-
-
+
+
private static Document CreateDocument(System.String text, int ng1, int ng2)
{
Document doc = new Document();
- doc.Add(new Field(F_WORD, text, Field.Store.YES, Field.Index.UN_TOKENIZED)); // orig term
+ doc.Add(new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term
AddGram(text, doc, ng1, ng2);
return doc;
}
-
-
- private static void AddGram(System.String text, Document doc, int ng1, int ng2)
+
+
+ private static void AddGram(System.String text, Document doc, int ng1, int ng2)
{
int len = text.Length;
for (int ng = ng1; ng <= ng2; ng++)
@@ -368,28 +486,110 @@ namespace SpellChecker.Net.Search.Spell
for (int i = 0; i < len - ng + 1; i++)
{
System.String gram = text.Substring(i, (i + ng) - (i));
- doc.Add(new Field(key, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.Add(new Field(key, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
if (i == 0)
{
- doc.Add(new Field("start" + ng, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.Add(new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
}
end = gram;
}
if (end != null)
{
// may not be present if len==ng1
- doc.Add(new Field("end" + ng, end, Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.Add(new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED));
}
}
}
-
-
- ~SpellChecker()
+
+ private IndexSearcher ObtainSearcher()
+ {
+ lock (searcherLock)
+ {
+ EnsureOpen();
+ searcher.GetIndexReader().IncRef();
+ return searcher;
+ }
+ }
+
+ private void ReleaseSearcher(IndexSearcher aSearcher)
{
- if (reader != null)
+ // don't check if open - always decRef
+ // don't decrement the private searcher - could have been swapped
+ aSearcher.GetIndexReader().DecRef();
+ }
+
+ private void EnsureOpen()
+ {
+ if (closed)
+ {
+ throw new AlreadyClosedException("Spellchecker has been closed");
+ }
+ }
+
+ public void Close()
+ {
+ lock (searcherLock)
{
- reader.Close();
+ EnsureOpen();
+ closed = true;
+ if (searcher != null)
+ {
+ searcher.Close();
+ }
+ searcher = null;
}
}
+
+ private void SwapSearcher(Directory dir)
+ {
+ /*
+ * opening a searcher is possibly very expensive.
+ * We rather close it again if the Spellchecker was closed during
+ * this operation than block access to the current searcher while opening.
+ */
+ IndexSearcher indexSearcher = CreateSearcher(dir);
+ lock (searcherLock)
+ {
+ if (closed)
+ {
+ indexSearcher.Close();
+ throw new AlreadyClosedException("Spellchecker has been closed");
+ }
+ if (searcher != null)
+ {
+ searcher.Close();
+ }
+ // set the spellindex in the sync block - ensure consistency.
+ searcher = indexSearcher;
+ this.spellindex = dir;
+ }
+ }
+
+ /// <summary>
+ /// Creates a new read-only IndexSearcher (for testing purposes)
+ /// </summary>
+ /// <param name="dir">dir the directory used to open the searcher</param>
+ /// <returns>a new read-only IndexSearcher. (throws IOException f there is a low-level IO error)</returns>
+ public virtual IndexSearcher CreateSearcher(Directory dir)
+ {
+ return new IndexSearcher(dir, true);
+ }
+
+ /// <summary>
+ /// Returns <code>true</code> if and only if the {@link SpellChecker} is
+ /// closed, otherwise <code>false</code>.
+ /// </summary>
+ /// <returns><code>true</code> if and only if the {@link SpellChecker} is
+ /// closed, otherwise <code>false</code>.
+ ///</returns>
+ bool IsClosed()
+ {
+ return closed;
+ }
+
+ ~SpellChecker()
+ {
+ this.Close();
+ }
}
}
\ No newline at end of file
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/StringDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/StringDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/StringDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/StringDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace SpellChecker.Net.Search.Spell
+{
+ /// <summary>
+ /// Interface for string distances.
+ /// </summary>
+ public interface StringDistance
+ {
+ /// <summary>
+ /// Returns a float between 0 and 1 based on how similar the specified strings are to one another.
+ /// Returning a value of 1 means the specified strings are identical and 0 means the
+ /// string are maximally different.
+ /// </summary>
+ /// <param name="s1">The first string.</param>
+ /// <param name="s2">The second string.</param>
+ /// <returns>a float between 0 and 1 based on how similar the specified strings are to one another.</returns>
+ float GetDistance(String s1, String s2);
+
+ }
+}
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/TRStringDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/Spell/TRStringDistance.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/TRStringDistance.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/Spell/TRStringDistance.cs Sun May 30 14:20:28 2010
@@ -16,12 +16,13 @@
*/
using System;
+using SpellChecker.Net.Search.Spell;
namespace SpellChecker.Net.Search.Spell
{
/// <summary> Edit distance class</summary>
- sealed class TRStringDistance
+ public class TRStringDistance
{
internal char[] sa;
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.csproj
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.csproj?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.csproj (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.csproj Sun May 30 14:20:28 2010
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup>
+ <ProjectType>Local</ProjectType>
+ <ProductVersion>9.0.21022</ProductVersion>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{FF45EE91-9CA3-443D-8231-75E9FA1AF40E}</ProjectGuid>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ApplicationIcon>
+ </ApplicationIcon>
+ <AssemblyKeyContainerName>
+ </AssemblyKeyContainerName>
+ <AssemblyName>SpellChecker.Net</AssemblyName>
+ <AssemblyOriginatorKeyFile>
+ </AssemblyOriginatorKeyFile>
+ <DefaultClientScript>JScript</DefaultClientScript>
+ <DefaultHTMLPageLayout>Grid</DefaultHTMLPageLayout>
+ <DefaultTargetSchema>IE50</DefaultTargetSchema>
+ <DelaySign>false</DelaySign>
+ <OutputType>Library</OutputType>
+ <RootNamespace>SpellChecker.Net</RootNamespace>
+ <RunPostBuildEvent>OnBuildSuccess</RunPostBuildEvent>
+ <StartupObject>
+ </StartupObject>
+ <FileUpgradeFlags>
+ </FileUpgradeFlags>
+ <UpgradeBackupLocation>
+ </UpgradeBackupLocation>
+ <OldToolsVersion>3.5</OldToolsVersion>
+ <TargetFrameworkVersion>v2.0</TargetFrameworkVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <OutputPath>bin\Debug\</OutputPath>
+ <AllowUnsafeBlocks>false</AllowUnsafeBlocks>
+ <BaseAddress>285212672</BaseAddress>
+ <CheckForOverflowUnderflow>false</CheckForOverflowUnderflow>
+ <ConfigurationOverrideFile>
+ </ConfigurationOverrideFile>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <DocumentationFile>SpellChecker.Net.xml</DocumentationFile>
+ <DebugSymbols>true</DebugSymbols>
+ <FileAlignment>4096</FileAlignment>
+ <NoStdLib>false</NoStdLib>
+ <NoWarn>
+ </NoWarn>
+ <Optimize>false</Optimize>
+ <RegisterForComInterop>false</RegisterForComInterop>
+ <RemoveIntegerChecks>false</RemoveIntegerChecks>
+ <TreatWarningsAsErrors>false</TreatWarningsAsErrors>
+ <WarningLevel>4</WarningLevel>
+ <DebugType>full</DebugType>
+ <ErrorReport>prompt</ErrorReport>
+ <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <OutputPath>bin\Release\</OutputPath>
+ <AllowUnsafeBlocks>false</AllowUnsafeBlocks>
+ <BaseAddress>285212672</BaseAddress>
+ <CheckForOverflowUnderflow>false</CheckForOverflowUnderflow>
+ <ConfigurationOverrideFile>
+ </ConfigurationOverrideFile>
+ <DefineConstants>TRACE</DefineConstants>
+ <DocumentationFile>SpellChecker.Net.xml</DocumentationFile>
+ <DebugSymbols>false</DebugSymbols>
+ <FileAlignment>4096</FileAlignment>
+ <NoStdLib>false</NoStdLib>
+ <NoWarn>
+ </NoWarn>
+ <Optimize>true</Optimize>
+ <RegisterForComInterop>false</RegisterForComInterop>
+ <RemoveIntegerChecks>false</RemoveIntegerChecks>
+ <TreatWarningsAsErrors>false</TreatWarningsAsErrors>
+ <WarningLevel>4</WarningLevel>
+ <DebugType>none</DebugType>
+ <ErrorReport>prompt</ErrorReport>
+ <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="Lucene.Net, Version=2.9.2.1, Culture=neutral, processorArchitecture=MSIL">
+ <SpecificVersion>False</SpecificVersion>
+ <HintPath>..\..\..\src\Lucene.Net\bin\Release\Lucene.Net.dll</HintPath>
+ </Reference>
+ <Reference Include="mscorlib">
+ <Name>mscorlib</Name>
+ </Reference>
+ <Reference Include="System">
+ <Name>System</Name>
+ </Reference>
+ <Reference Include="System.Data">
+ <Name>System.Data</Name>
+ </Reference>
+ <Reference Include="System.Design">
+ <Name>System.Design</Name>
+ </Reference>
+ <Reference Include="System.Drawing">
+ <Name>System.Drawing</Name>
+ </Reference>
+ <Reference Include="System.Management">
+ <Name>System.Management</Name>
+ </Reference>
+ <Reference Include="System.Windows.Forms">
+ <Name>System.Windows.Forms</Name>
+ </Reference>
+ <Reference Include="System.Xml">
+ <Name>System.Xml</Name>
+ </Reference>
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="AssemblyInfo.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\Dictionary.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\JaroWinklerDistance.cs" />
+ <Compile Include="Spell\LevenshteinDistance.cs" />
+ <Compile Include="Spell\LuceneDictionary.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\NGramDistance.cs" />
+ <Compile Include="Spell\PlainTextDictionary.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\SpellChecker.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\StringDistance.cs" />
+ <Compile Include="Spell\SuggestWord.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\SuggestWordQueue.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Spell\TRStringDistance.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Content Include="Build.xml" />
+ <Content Include="Spell\Package.html" />
+ </ItemGroup>
+ <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
+ <PropertyGroup>
+ <PreBuildEvent>
+ </PreBuildEvent>
+ <PostBuildEvent>
+ </PostBuildEvent>
+ </PropertyGroup>
+</Project>
\ No newline at end of file
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.sln
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.sln?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.sln (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/SpellChecker.Net/SpellChecker.Net.sln Sun May 30 14:20:28 2010
@@ -1,24 +1,22 @@
-Microsoft Visual Studio Solution File, Format Version 8.00
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SpellChecker.Net-2.0.0", "SpellChecker.Net-2.0.0.csproj", "{FF45EE91-9CA3-443D-8231-75E9FA1AF40E}"
- ProjectSection(ProjectDependencies) = postProject
- EndProjectSection
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual C# Express 2008
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SpellChecker.Net", "SpellChecker.Net.csproj", "{FF45EE91-9CA3-443D-8231-75E9FA1AF40E}"
EndProject
Global
- GlobalSection(DPCodeReviewSolutionGUID) = preSolution
- DPCodeReviewSolutionGUID = {00000000-0000-0000-0000-000000000000}
- EndGlobalSection
- GlobalSection(SolutionConfiguration) = preSolution
- Debug = Debug
- Release = Release
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
EndGlobalSection
- GlobalSection(ProjectConfiguration) = postSolution
- {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Debug.ActiveCfg = Debug|.NET
- {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Debug.Build.0 = Debug|.NET
- {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Release.ActiveCfg = Release|.NET
- {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Release.Build.0 = Release|.NET
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {FF45EE91-9CA3-443D-8231-75E9FA1AF40E}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
EndGlobalSection
- GlobalSection(ExtensibilityAddIns) = postSolution
+ GlobalSection(DPCodeReviewSolutionGUID) = preSolution
+ DPCodeReviewSolutionGUID = {00000000-0000-0000-0000-000000000000}
EndGlobalSection
EndGlobal
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/AssemblyInfo.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/AssemblyInfo.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/AssemblyInfo.cs Sun May 30 14:20:28 2010
@@ -16,7 +16,7 @@ using System.Runtime.CompilerServices;
[assembly: AssemblyDefaultAlias("Lucene.Net.SpellChecker")]
[assembly: AssemblyCulture("")]
-[assembly: AssemblyInformationalVersionAttribute("2.0")]
+[assembly: AssemblyInformationalVersionAttribute("2.9")]
//
// Version information for an assembly consists of the following four values:
@@ -29,7 +29,7 @@ using System.Runtime.CompilerServices;
// You can specify all the values or you can default the Revision and Build Numbers
// by using the '*' as shown below:
-[assembly: AssemblyVersion("2.0.0.1")]
+[assembly: AssemblyVersion("2.9.2.1")]
//
// In order to sign your assembly you must specify a key to use. Refer to the
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.csproj
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.csproj?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.csproj (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.csproj Sun May 30 14:20:28 2010
@@ -0,0 +1,125 @@
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="3.5">
+ <PropertyGroup>
+ <ProjectType>Local</ProjectType>
+ <ProductVersion>8.0.50727</ProductVersion>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}</ProjectGuid>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ApplicationIcon>
+ </ApplicationIcon>
+ <AssemblyKeyContainerName>
+ </AssemblyKeyContainerName>
+ <AssemblyName>SpellChecker.Net.Test</AssemblyName>
+ <AssemblyOriginatorKeyFile>
+ </AssemblyOriginatorKeyFile>
+ <DefaultClientScript>JScript</DefaultClientScript>
+ <DefaultHTMLPageLayout>Grid</DefaultHTMLPageLayout>
+ <DefaultTargetSchema>IE50</DefaultTargetSchema>
+ <DelaySign>false</DelaySign>
+ <OutputType>Library</OutputType>
+ <RootNamespace>SpellChecker.Net.Test</RootNamespace>
+ <RunPostBuildEvent>OnBuildSuccess</RunPostBuildEvent>
+ <StartupObject>
+ </StartupObject>
+ <FileUpgradeFlags>
+ </FileUpgradeFlags>
+ <UpgradeBackupLocation>
+ </UpgradeBackupLocation>
+ <OldToolsVersion>2.0</OldToolsVersion>
+ <TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <OutputPath>bin\Debug\</OutputPath>
+ <AllowUnsafeBlocks>false</AllowUnsafeBlocks>
+ <BaseAddress>285212672</BaseAddress>
+ <CheckForOverflowUnderflow>false</CheckForOverflowUnderflow>
+ <ConfigurationOverrideFile>
+ </ConfigurationOverrideFile>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <DocumentationFile>
+ </DocumentationFile>
+ <DebugSymbols>true</DebugSymbols>
+ <FileAlignment>4096</FileAlignment>
+ <NoStdLib>false</NoStdLib>
+ <NoWarn>
+ </NoWarn>
+ <Optimize>false</Optimize>
+ <RegisterForComInterop>false</RegisterForComInterop>
+ <RemoveIntegerChecks>false</RemoveIntegerChecks>
+ <TreatWarningsAsErrors>false</TreatWarningsAsErrors>
+ <WarningLevel>4</WarningLevel>
+ <DebugType>full</DebugType>
+ <ErrorReport>prompt</ErrorReport>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <OutputPath>bin\Release\</OutputPath>
+ <AllowUnsafeBlocks>false</AllowUnsafeBlocks>
+ <BaseAddress>285212672</BaseAddress>
+ <CheckForOverflowUnderflow>false</CheckForOverflowUnderflow>
+ <ConfigurationOverrideFile>
+ </ConfigurationOverrideFile>
+ <DefineConstants>TRACE</DefineConstants>
+ <DocumentationFile>
+ </DocumentationFile>
+ <DebugSymbols>false</DebugSymbols>
+ <FileAlignment>4096</FileAlignment>
+ <NoStdLib>false</NoStdLib>
+ <NoWarn>
+ </NoWarn>
+ <Optimize>true</Optimize>
+ <RegisterForComInterop>false</RegisterForComInterop>
+ <RemoveIntegerChecks>false</RemoveIntegerChecks>
+ <TreatWarningsAsErrors>false</TreatWarningsAsErrors>
+ <WarningLevel>4</WarningLevel>
+ <DebugType>none</DebugType>
+ <ErrorReport>prompt</ErrorReport>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="Lucene.Net, Version=2.9.2.1, Culture=neutral, processorArchitecture=MSIL">
+ <SpecificVersion>False</SpecificVersion>
+ <HintPath>..\..\..\src\Lucene.Net\bin\Release\Lucene.Net.dll</HintPath>
+ </Reference>
+ <Reference Include="nunit.framework">
+ <Name>nunit.framework</Name>
+ <HintPath>D:\DEVS\NUnit\bin\nunit.framework.dll</HintPath>
+ <AssemblyFolderKey>hklm\dn\nunit.framework</AssemblyFolderKey>
+ </Reference>
+ <Reference Include="SpellChecker.Net, Version=2.0.0.2, Culture=neutral, processorArchitecture=MSIL">
+ <SpecificVersion>False</SpecificVersion>
+ <HintPath>..\SpellChecker.Net\bin\Release\SpellChecker.Net.dll</HintPath>
+ </Reference>
+ <Reference Include="System">
+ <Name>System</Name>
+ </Reference>
+ <Reference Include="System.Data">
+ <Name>System.Data</Name>
+ </Reference>
+ <Reference Include="System.XML">
+ <Name>System.XML</Name>
+ </Reference>
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="AssemblyInfo.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Test\TestJaroWinklerDistance.cs" />
+ <Compile Include="Test\TestLevenshteinDistance.cs" />
+ <Compile Include="Test\TestLuceneDictionary.cs" />
+ <Compile Include="Test\TestNGramDistance.cs" />
+ <Compile Include="Test\TestPlainTextDictionary.cs" />
+ <Compile Include="Test\TestSpellChecker.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ <Compile Include="Util\English.cs">
+ <SubType>Code</SubType>
+ </Compile>
+ </ItemGroup>
+ <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
+ <PropertyGroup>
+ <PreBuildEvent>
+ </PreBuildEvent>
+ <PostBuildEvent>
+ </PostBuildEvent>
+ </PropertyGroup>
+</Project>
\ No newline at end of file
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.sln
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.sln?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.sln (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/SpellChecker.Net.Test.sln Sun May 30 14:20:28 2010
@@ -1,24 +1,22 @@
-Microsoft Visual Studio Solution File, Format Version 8.00
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SpellChecker.Net.Test-2.0.0", "SpellChecker.Net.Test-2.0.0.csproj", "{4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}"
- ProjectSection(ProjectDependencies) = postProject
- EndProjectSection
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual C# Express 2008
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SpellChecker.Net.Test", "SpellChecker.Net.Test.csproj", "{4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}"
EndProject
Global
- GlobalSection(DPCodeReviewSolutionGUID) = preSolution
- DPCodeReviewSolutionGUID = {00000000-0000-0000-0000-000000000000}
- EndGlobalSection
- GlobalSection(SolutionConfiguration) = preSolution
- Debug = Debug
- Release = Release
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
EndGlobalSection
- GlobalSection(ProjectConfiguration) = postSolution
- {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Debug.ActiveCfg = Debug|.NET
- {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Debug.Build.0 = Debug|.NET
- {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Release.ActiveCfg = Release|.NET
- {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Release.Build.0 = Release|.NET
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {4DCB81AA-ECC1-4B3D-A0C9-28E54F5B125C}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
EndGlobalSection
- GlobalSection(ExtensibilityAddIns) = postSolution
+ GlobalSection(DPCodeReviewSolutionGUID) = preSolution
+ DPCodeReviewSolutionGUID = {00000000-0000-0000-0000-000000000000}
EndGlobalSection
EndGlobal
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestJaroWinklerDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestJaroWinklerDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestJaroWinklerDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestJaroWinklerDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using SpellChecker.Net.Search.Spell;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+ [TestFixture]
+ public class TestJaroWinklerDistance
+ {
+ private StringDistance sd = new JaroWinklerDistance();
+
+ [Test]
+ public void TestGetDistance()
+ {
+ float d = sd.GetDistance("al", "al");
+ Assert.IsTrue(d == 1.0f);
+ d = sd.GetDistance("martha", "marhta");
+ Assert.IsTrue(d > 0.961 && d < 0.962);
+ d = sd.GetDistance("jones", "johnson");
+ Assert.IsTrue(d > 0.832 && d < 0.833);
+ d = sd.GetDistance("abcvwxyz", "cabvwxyz");
+ Assert.IsTrue(d > 0.958 && d < 0.959);
+ d = sd.GetDistance("dwayne", "duane");
+ Assert.IsTrue(d > 0.84 && d < 0.841);
+ d = sd.GetDistance("dixon", "dicksonx");
+ Assert.IsTrue(d > 0.813 && d < 0.814);
+ d = sd.GetDistance("fvie", "ten");
+ Assert.IsTrue(d == 0f);
+ float d1 = sd.GetDistance("zac ephron", "zac efron");
+ float d2 = sd.GetDistance("zac ephron", "kai ephron");
+ Assert.IsTrue(d1 > d2);
+ d1 = sd.GetDistance("brittney spears", "britney spears");
+ d2 = sd.GetDistance("brittney spears", "brittney startzman");
+ Assert.IsTrue(d1 > d2);
+ }
+ }
+}
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLevenshteinDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestLevenshteinDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLevenshteinDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLevenshteinDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+using SpellChecker.Net.Search.Spell;
+using NUnit.Framework;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+ [TestFixture]
+ public class TestLevenshteinDistance
+ {
+ private StringDistance sd = new LevenshteinDistance();
+
+ [Test]
+ public void TestGetDistance()
+ {
+ float d = sd.GetDistance("al", "al");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = sd.GetDistance("martha", "marhta");
+ Assert.AreEqual(d, 0.6666, 0.001);
+ d = sd.GetDistance("jones", "johnson");
+ Assert.AreEqual(d, 0.4285, 0.001);
+ d = sd.GetDistance("abcvwxyz", "cabvwxyz");
+ Assert.AreEqual(d, 0.75, 0.001);
+ d = sd.GetDistance("dwayne", "duane");
+ Assert.AreEqual(d, 0.666, 0.001);
+ d = sd.GetDistance("dixon", "dicksonx");
+ Assert.AreEqual(d, 0.5, 0.001);
+ d = sd.GetDistance("six", "ten");
+ Assert.AreEqual(d, 0, 0.001);
+ float d1 = sd.GetDistance("zac ephron", "zac efron");
+ float d2 = sd.GetDistance("zac ephron", "kai ephron");
+ Assert.AreEqual(d1, d2, 0.001);
+ d1 = sd.GetDistance("brittney spears", "britney spears");
+ d2 = sd.GetDistance("brittney spears", "brittney startzman");
+ Assert.True(d1 > d2);
+ }
+
+ [Test]
+ public void TestEmpty()
+ {
+ float d = sd.GetDistance("", "al");
+ Assert.AreEqual(d, 0.0f, 0.001);
+ }
+
+ }
+}