You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by pn...@apache.org on 2011/11/25 10:03:32 UTC
[Lucene.Net] svn commit: r1206083 - in
/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk:
src/contrib/SpellChecker/ src/contrib/SpellChecker/Spell/
test/contrib/SpellChecker/Test/
Author: pnasser
Date: Fri Nov 25 09:03:28 2011
New Revision: 1206083
URL: http://svn.apache.org/viewvc?rev=1206083&view=rev
Log:
Contrib.SpellChecker file and unit tests - all pass
Added:
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/IDictionary.cs
Removed:
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/Dictionary.cs
Modified:
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Contrib.SpellChecker.csproj
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/JaroWinklerDistance.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/LuceneDictionary.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/PlainTextDictionary.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SpellChecker.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/StringDistance.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SuggestWordQueue.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/TRStringDistance.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestJaroWinklerDistance.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLevenshteinDistance.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLuceneDictionary.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestNGramDistance.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestPlainTextDictionary.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestSpellChecker.cs
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Contrib.SpellChecker.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Contrib.SpellChecker.csproj?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Contrib.SpellChecker.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Contrib.SpellChecker.csproj Fri Nov 25 09:03:28 2011
@@ -19,7 +19,6 @@
under the License.
-->
-
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<ProjectType>Local</ProjectType>
@@ -108,7 +107,7 @@
<Compile Include="AssemblyInfo.cs">
<SubType>Code</SubType>
</Compile>
- <Compile Include="Spell\Dictionary.cs">
+ <Compile Include="Spell\IDictionary.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Spell\JaroWinklerDistance.cs" />
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/IDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/IDictionary.cs?rev=1206083&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/IDictionary.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/IDictionary.cs Fri Nov 25 09:03:28 2011
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace SpellChecker.Net.Search.Spell
+{
+
+ /// <summary> A simple interface representing a Dictionary</summary>
+ public interface IDictionary
+ {
+ /// <summary> return all the words present in the dictionary</summary>
+ /// <returns> Iterator
+ /// </returns>
+ System.Collections.IEnumerator GetWordsIterator();
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/JaroWinklerDistance.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/JaroWinklerDistance.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/JaroWinklerDistance.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/JaroWinklerDistance.cs Fri Nov 25 09:03:28 2011
@@ -15,19 +15,22 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
-using System.Text;
+
+using System.Linq;
namespace SpellChecker.Net.Search.Spell
{
+
+ using System;
+
public class JaroWinklerDistance : StringDistance
{
private float threshold = 0.7f;
- private int[] Matches(String s1, String s2)
+ private static int[] Matches(String s1, String s2)
{
String Max, Min;
+
if (s1.Length > s2.Length)
{
Max = s1;
@@ -38,29 +41,34 @@ namespace SpellChecker.Net.Search.Spell
Max = s2;
Min = s1;
}
- int range = Math.Max(Max.Length / 2 - 1, 0);
- int[] matchIndexes = new int[Min.Length];
- for (int i = 0; i < matchIndexes.Length; i++)
+
+ var range = Math.Max(Max.Length / 2 - 1, 0);
+ var matchIndexes = new int[Min.Length];
+
+ for (var i = 0; i < matchIndexes.Length; i++)
matchIndexes[i] = -1;
- bool[] matchFlags = new bool[Max.Length];
- int matches = 0;
- for (int mi = 0; mi < Min.Length; mi++)
- {
- char c1 = Min[mi];
- for (int xi = Math.Max(mi - range, 0), xn = Math.Min(mi + range + 1, Max
- .Length); xi < xn; xi++)
- {
- if (!matchFlags[xi] && c1 == Max[xi])
- {
- matchIndexes[mi] = xi;
- matchFlags[xi] = true;
- matches++;
- break;
- }
+
+ var matchFlags = new bool[Max.Length];
+ var matches = 0;
+
+ for (var mi = 0; mi < Min.Length; mi++)
+ {
+ var c1 = Min[mi];
+ for (int xi = Math.Max(mi - range, 0),
+ xn = Math.Min(mi + range + 1, Max.Length); xi < xn; xi++)
+ {
+ if (matchFlags[xi] || c1 != Max[xi]) continue;
+
+ matchIndexes[mi] = xi;
+ matchFlags[xi] = true;
+ matches++;
+ break;
}
}
- char[] ms1 = new char[matches];
- char[] ms2 = new char[matches];
+
+ var ms1 = new char[matches];
+ var ms2 = new char[matches];
+
for (int i = 0, si = 0; i < Min.Length; i++)
{
if (matchIndexes[i] != -1)
@@ -69,6 +77,7 @@ namespace SpellChecker.Net.Search.Spell
si++;
}
}
+
for (int i = 0, si = 0; i < Max.Length; i++)
{
if (matchFlags[i])
@@ -77,16 +86,11 @@ namespace SpellChecker.Net.Search.Spell
si++;
}
}
- int transpositions = 0;
- for (int mi = 0; mi < ms1.Length; mi++)
- {
- if (ms1[mi] != ms2[mi])
- {
- transpositions++;
- }
- }
- int prefix = 0;
- for (int mi = 0; mi < Min.Length; mi++)
+
+ var transpositions = ms1.Where((t, mi) => t != ms2[mi]).Count();
+
+ var prefix = 0;
+ for (var mi = 0; mi < Min.Length; mi++)
{
if (s1[mi] == s2[mi])
{
@@ -97,25 +101,25 @@ namespace SpellChecker.Net.Search.Spell
break;
}
}
+
return new int[] { matches, transpositions / 2, prefix, Max.Length };
}
public float GetDistance(String s1, String s2)
{
- int[] mtp = Matches(s1, s2);
- float m = (float)mtp[0];
+ var mtp = Matches(s1, s2);
+ var m = (float)mtp[0];
+
if (m == 0)
- {
return 0f;
- }
+
float j = ((m / s1.Length + m / s2.Length + (m - mtp[1]) / m)) / 3;
- float jw = j < GetThreshold() ? j : j + Math.Min(0.1f, 1f / mtp[3]) * mtp[2]
- * (1 - j);
+ float jw = j < GetThreshold() ? j : j + Math.Min(0.1f, 1f / mtp[3]) * mtp[2] * (1 - j);
return jw;
}
/// <summary>
- ///Sets the threshold used to deterMine when Winkler bonus should be used.
+ /// Sets the threshold used to deterMine when Winkler bonus should be used.
/// Set to a negative value to get the Jaro distance.
/// </summary>
/// <param name="threshold">the new value of the threshold</param>
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/LuceneDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/LuceneDictionary.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/LuceneDictionary.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/LuceneDictionary.cs Fri Nov 25 09:03:28 2011
@@ -15,17 +15,17 @@
* limitations under the License.
*/
-using System;
-using IndexReader = Lucene.Net.Index.IndexReader;
-using TermEnum = Lucene.Net.Index.TermEnum;
-using Term = Lucene.Net.Index.Term;
-
namespace SpellChecker.Net.Search.Spell
{
- /// <summary> Lucene Dictionary
- ///
+ using System;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using TermEnum = Lucene.Net.Index.TermEnum;
+ using Term = Lucene.Net.Index.Term;
+
+ /// <summary>
+ /// Lucene Dictionary
/// </summary>
- public class LuceneDictionary : Dictionary
+ public class LuceneDictionary : IDictionary
{
internal IndexReader reader;
internal System.String field;
@@ -49,11 +49,11 @@ namespace SpellChecker.Net.Search.Spell
internal sealed class LuceneIterator : System.Collections.IEnumerator
{
- private TermEnum termEnum;
+ private readonly TermEnum termEnum;
private Term actualTerm;
private bool hasNextCalled;
- private LuceneDictionary enclosingInstance;
+ private readonly LuceneDictionary enclosingInstance;
public LuceneIterator(LuceneDictionary enclosingInstance)
{
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/PlainTextDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/PlainTextDictionary.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/PlainTextDictionary.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/PlainTextDictionary.cs Fri Nov 25 09:03:28 2011
@@ -30,7 +30,7 @@ namespace SpellChecker.Net.Search.Spell
/// </summary>
/// <author> Nicolas Maisonneuve
/// </author>
- public class PlainTextDictionary : Dictionary
+ public class PlainTextDictionary : IDictionary
{
virtual public System.Collections.IEnumerator GetWordsIterator()
{
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SpellChecker.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SpellChecker.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SpellChecker.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SpellChecker.cs Fri Nov 25 09:03:28 2011
@@ -15,27 +15,27 @@
* limitations under the License.
*/
-using System;
-
-using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
-using Document = Lucene.Net.Documents.Document;
-using Field = Lucene.Net.Documents.Field;
-using IndexReader = Lucene.Net.Index.IndexReader;
-using IndexWriter = Lucene.Net.Index.IndexWriter;
-using Term = Lucene.Net.Index.Term;
-using BooleanClause = Lucene.Net.Search.BooleanClause;
-using BooleanQuery = Lucene.Net.Search.BooleanQuery;
-using Hits = Lucene.Net.Search.Hits;
-using IndexSearcher = Lucene.Net.Search.IndexSearcher;
-using Query = Lucene.Net.Search.Query;
-using TermQuery = Lucene.Net.Search.TermQuery;
-using Directory = Lucene.Net.Store.Directory;
-using SpellChecker.Net.Search.Spell;
-using Lucene.Net.Store;
-using Lucene.Net.Search;
namespace SpellChecker.Net.Search.Spell
{
+ using System;
+
+ using Lucene.Net.Search;
+ using Lucene.Net.Store;
+ using BooleanClause = Lucene.Net.Search.BooleanClause;
+ using BooleanQuery = Lucene.Net.Search.BooleanQuery;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Lucene.Net.Documents.Document;
+ using Field = Lucene.Net.Documents.Field;
+ using IndexReader = Lucene.Net.Index.IndexReader;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using IndexWriter = Lucene.Net.Index.IndexWriter;
+ using Query = Lucene.Net.Search.Query;
+ using Term = Lucene.Net.Index.Term;
+ using TermQuery = Lucene.Net.Search.TermQuery;
+ using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
+
+
/// <summary> <p>
/// Spell Checker class (Main class) <br/>
/// (initially inspired by the David Spencer code).
@@ -67,17 +67,16 @@ namespace SpellChecker.Net.Search.Spell
internal Directory spellindex;
/// <summary> Boost value for start and end grams</summary>
- private float bStart = 2.0f;
- private float bEnd = 1.0f;
+ private const float bStart = 2.0f;
+ private const float bEnd = 1.0f;
- //private IndexReader reader;
// don't use this searcher directly - see #swapSearcher()
private IndexSearcher searcher;
/// <summary>
/// this locks all modifications to the current searcher.
/// </summary>
- private static System.Object searcherLock = new System.Object();
+ private static readonly System.Object searcherLock = new System.Object();
/*
* this lock synchronizes all possible modifications to the
@@ -85,7 +84,7 @@ namespace SpellChecker.Net.Search.Spell
* the same index concurrently. Note: Do not acquire the searcher lock
* before acquiring this lock!
*/
- private static System.Object modifyCurrentIndexLock = new System.Object();
+ private static readonly System.Object modifyCurrentIndexLock = new System.Object();
private volatile bool closed = false;
internal float minScore = 0.5f; //LUCENENET-359 Spellchecker accuracy gets overwritten
@@ -96,11 +95,11 @@ namespace SpellChecker.Net.Search.Spell
/// Use the given directory as a spell checker index. The directory
/// is created if it doesn't exist yet.
/// </summary>
- /// <param name="gramIndex">the spell index directory</param>
+ /// <param name="spellIndex">the spell index directory</param>
/// <param name="sd">the <see cref="StringDistance"/> measurement to use </param>
- public SpellChecker(Directory gramIndex, StringDistance sd)
+ public SpellChecker(Directory spellIndex, StringDistance sd)
{
- this.SetSpellIndex(gramIndex);
+ this.SetSpellIndex(spellIndex);
this.setStringDistance(sd);
}
@@ -109,9 +108,9 @@ namespace SpellChecker.Net.Search.Spell
/// <see cref="LevenshteinDistance"/> as the default <see cref="StringDistance"/>. The
/// directory is created if it doesn't exist yet.
/// </summary>
- /// <param name="gramIndex">the spell index directory</param>
- public SpellChecker(Directory gramIndex)
- : this(gramIndex, new LevenshteinDistance())
+ /// <param name="spellIndex">the spell index directory</param>
+ public SpellChecker(Directory spellIndex)
+ : this(spellIndex, new LevenshteinDistance())
{ }
/// <summary>
@@ -131,7 +130,7 @@ namespace SpellChecker.Net.Search.Spell
EnsureOpen();
if (!IndexReader.IndexExists(spellIndexDir))
{
- IndexWriter writer = new IndexWriter(spellIndexDir, null, true,
+ var writer = new IndexWriter(spellIndexDir, null, true,
IndexWriter.MaxFieldLength.UNLIMITED);
writer.Close();
}
@@ -390,7 +389,7 @@ namespace SpellChecker.Net.Search.Spell
/// <param name="ramMB">the max amount or memory in MB to use</param>
/// <throws> IOException </throws>
/// <throws>AlreadyClosedException if the Spellchecker is already closed</throws>
- public virtual void IndexDictionary(Dictionary dict, int mergeFactor, int ramMB)
+ public virtual void IndexDictionary(IDictionary dict, int mergeFactor, int ramMB)
{
lock (modifyCurrentIndexLock)
{
@@ -431,10 +430,10 @@ namespace SpellChecker.Net.Search.Spell
}
/// <summary>
- /// Indexes the data from the given <see cref="Dictionary"/>.
+ /// Indexes the data from the given <see cref="IDictionary"/>.
/// </summary>
/// <param name="dict">dict the dictionary to index</param>
- public void IndexDictionary(Dictionary dict)
+ public void IndexDictionary(IDictionary dict)
{
IndexDictionary(dict, 300, 10);
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/StringDistance.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/StringDistance.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/StringDistance.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/StringDistance.cs Fri Nov 25 09:03:28 2011
@@ -15,12 +15,11 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
-using System.Text;
namespace SpellChecker.Net.Search.Spell
{
+ using System;
+
/// <summary>
/// Interface for string distances.
/// </summary>
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SuggestWordQueue.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SuggestWordQueue.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SuggestWordQueue.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/SuggestWordQueue.cs Fri Nov 25 09:03:28 2011
@@ -15,13 +15,11 @@
* limitations under the License.
*/
-using System;
-using PriorityQueue = Lucene.Net.Util.PriorityQueue;
namespace SpellChecker.Net.Search.Spell
{
-
-
+ using PriorityQueue = Lucene.Net.Util.PriorityQueue<SuggestWord>;
+
sealed class SuggestWordQueue : PriorityQueue
{
@@ -30,11 +28,9 @@ namespace SpellChecker.Net.Search.Spell
Initialize(size);
}
- override public bool LessThan(System.Object a, System.Object b)
+ override public bool LessThan(SuggestWord a, SuggestWord b)
{
- SuggestWord wa = (SuggestWord) a;
- SuggestWord wb = (SuggestWord) b;
- int val = wa.CompareTo(wb);
+ var val = a.CompareTo(b);
return val < 0;
}
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/TRStringDistance.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/TRStringDistance.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/TRStringDistance.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/SpellChecker/Spell/TRStringDistance.cs Fri Nov 25 09:03:28 2011
@@ -15,8 +15,6 @@
* limitations under the License.
*/
-using System;
-using SpellChecker.Net.Search.Spell;
namespace SpellChecker.Net.Search.Spell
{
@@ -46,8 +44,7 @@ namespace SpellChecker.Net.Search.Spell
public int GetDistance(System.String other)
{
int[][] d; // matrix
- int cost; // cost
-
+
// Step 1
char[] ta = other.ToCharArray();
int m = ta.Length;
@@ -85,19 +82,8 @@ namespace SpellChecker.Net.Search.Spell
char t_j = ta[j - 1];
// Step 5
-
- if (s_i == t_j)
- {
- // same
- cost = 0;
- }
- else
- {
- // not a match
- cost = 1;
-
- // Step 6
- }
+
+ int cost = s_i == t_j ? 0 : 1;
d[i][j] = Min3(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
}
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestJaroWinklerDistance.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestJaroWinklerDistance.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestJaroWinklerDistance.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestJaroWinklerDistance.cs Fri Nov 25 09:03:28 2011
@@ -15,10 +15,6 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
-using System.Text;
-
using NUnit.Framework;
using SpellChecker.Net.Search.Spell;
@@ -28,7 +24,7 @@ namespace SpellChecker.Net.Test.Search.S
[TestFixture]
public class TestJaroWinklerDistance
{
- private StringDistance sd = new JaroWinklerDistance();
+ private readonly StringDistance sd = new JaroWinklerDistance();
[Test]
public void TestGetDistance()
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLevenshteinDistance.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLevenshteinDistance.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLevenshteinDistance.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLevenshteinDistance.cs Fri Nov 25 09:03:28 2011
@@ -15,9 +15,6 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
-using System.Text;
using SpellChecker.Net.Search.Spell;
using NUnit.Framework;
@@ -26,7 +23,7 @@ namespace SpellChecker.Net.Test.Search.S
[TestFixture]
public class TestLevenshteinDistance
{
- private StringDistance sd = new LevenshteinDistance();
+ private readonly StringDistance sd = new LevenshteinDistance();
[Test]
public void TestGetDistance()
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLuceneDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLuceneDictionary.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLuceneDictionary.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestLuceneDictionary.cs Fri Nov 25 09:03:28 2011
@@ -17,9 +17,6 @@
using System;
using System.Collections;
-using System.Collections.Generic;
-using System.Text;
-
using NUnit.Framework;
using Lucene.Net.Store;
@@ -34,9 +31,9 @@ namespace SpellChecker.Net.Test.Search.S
public class TestLuceneDictionary
{
- private Directory store = new RAMDirectory();
+ private readonly Directory store = new RAMDirectory();
- private IndexReader indexReader = null;
+ private IndexReader indexReader;
private LuceneDictionary ld;
private IEnumerator it;
@@ -45,11 +42,9 @@ namespace SpellChecker.Net.Test.Search.S
public void SetUp()
{
- IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
-
- Document doc;
+ var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
- doc = new Document();
+ var doc = new Document();
doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(doc);
@@ -208,7 +203,7 @@ namespace SpellChecker.Net.Test.Search.S
[Test]
public void TestSpellchecker()
{
- SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory());
+ var sc = new Net.Search.Spell.SpellChecker(new RAMDirectory());
indexReader = IndexReader.Open(store);
sc.IndexDictionary(new LuceneDictionary(indexReader, "contents"));
String[] suggestions = sc.SuggestSimilar("Tam", 1);
@@ -221,22 +216,23 @@ namespace SpellChecker.Net.Test.Search.S
}
#region .NET
- void AssertTrue(string s, bool b)
+
+ static void AssertTrue(string s, bool b)
{
Assert.IsTrue(b, s);
}
- void AssertFalse(string s, bool b)
+ static void AssertFalse(string s, bool b)
{
Assert.IsFalse(b, s);
}
- void AssertEquals(int i, int j)
+ static void AssertEquals(int i, int j)
{
Assert.AreEqual(i, j);
}
- void AssertEquals(string i, string j)
+ static void AssertEquals(string i, string j)
{
Assert.AreEqual(i, j);
}
@@ -266,7 +262,6 @@ namespace System.Runtime.CompilerService
[AttributeUsage(AttributeTargets.Method)]
public sealed class ExtensionAttribute : Attribute
{
- public ExtensionAttribute() { }
}
}
#endregion
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestNGramDistance.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestNGramDistance.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestNGramDistance.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestNGramDistance.cs Fri Nov 25 09:03:28 2011
@@ -15,10 +15,6 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
-using System.Text;
-
using NUnit.Framework;
using SpellChecker.Net.Search.Spell;
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestPlainTextDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestPlainTextDictionary.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestPlainTextDictionary.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestPlainTextDictionary.cs Fri Nov 25 09:03:28 2011
@@ -17,7 +17,6 @@
using System;
using System.IO;
-using System.Collections.Generic;
using System.Text;
using NUnit.Framework;
@@ -35,11 +34,11 @@ namespace SpellChecker.Net.Test.Search.S
public void TestBuild()
{
- String LF = System.Environment.NewLine;
- String input = "oneword" + LF + "twoword" + LF + "threeword";
- PlainTextDictionary ptd = new PlainTextDictionary( new MemoryStream( System.Text.Encoding.UTF8.GetBytes(input)) );
- RAMDirectory ramDir = new RAMDirectory();
- SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir);
+ var LF = Environment.NewLine;
+ var input = "oneword" + LF + "twoword" + LF + "threeword";
+ var ptd = new PlainTextDictionary( new MemoryStream( Encoding.UTF8.GetBytes(input)) );
+ var ramDir = new RAMDirectory();
+ var spellChecker = new Net.Search.Spell.SpellChecker(ramDir);
spellChecker.IndexDictionary(ptd);
String[] similar = spellChecker.SuggestSimilar("treeword", 2);
Assert.AreEqual(2, similar.Length);
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestSpellChecker.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestSpellChecker.cs?rev=1206083&r1=1206082&r2=1206083&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestSpellChecker.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/SpellChecker/Test/TestSpellChecker.cs Fri Nov 25 09:03:28 2011
@@ -14,9 +14,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
using System;
-
+using System.Collections.Concurrent;
using NUnit.Framework;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
@@ -36,32 +35,28 @@ using Lucene.Net.Search;
namespace SpellChecker.Net.Test.Search.Spell
{
-
-
- /// <summary> Test case
- ///
+ /// <summary>
+ /// Test case
/// </summary>
- /// <author> Nicolas Maisonneuve
- /// </author>
+ /// <author>Nicolas Maisonneuve</author>
[TestFixture]
public class TestSpellChecker
{
private SpellCheckerMock spellChecker;
private Directory userindex, spellindex;
- public ArrayList searchers;
- private Random random = new Random();
-
+ private readonly Random random = new Random();
+ public ConcurrentQueue<IndexSearcher> searchers;
[SetUp]
public virtual void SetUp()
{
//create a user index
userindex = new RAMDirectory();
- IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+ var writer = new IndexWriter(userindex, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
- for (int i = 0; i < 1000; i++)
+ for (var i = 0; i < 1000; i++)
{
- Document doc = new Document();
+ var doc = new Document();
doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand
writer.AddDocument(doc);
@@ -70,7 +65,7 @@ namespace SpellChecker.Net.Test.Search.S
// create the spellChecker
spellindex = new RAMDirectory();
- searchers = ArrayList.Synchronized(new ArrayList());
+ searchers = new ConcurrentQueue<IndexSearcher>();
spellChecker = new SpellCheckerMock(spellindex, this);
}
@@ -112,6 +107,7 @@ namespace SpellChecker.Net.Test.Search.S
Assert.Fail();
}
}
+
private void CheckCommonSuggestions(IndexReader r)
{
String[] similar = spellChecker.SuggestSimilar("fvie", 2);
@@ -190,7 +186,7 @@ namespace SpellChecker.Net.Test.Search.S
Assert.AreEqual(similar[1], "one");
try
{
- similar = spellChecker.SuggestSimilar("tousand", 10, r, null, false);
+ spellChecker.SuggestSimilar("tousand", 10, r, null, false);
}
catch (NullReferenceException e)
{
@@ -218,16 +214,14 @@ namespace SpellChecker.Net.Test.Search.S
{
long time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
spellChecker.IndexDictionary(new LuceneDictionary(r, field));
- time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - time;
- //System.out.println("time to build " + field + ": " + time);
}
private int Numdoc()
{
- IndexReader rs = IndexReader.Open(spellindex);
+ var rs = IndexReader.Open(spellindex);
int num = rs.NumDocs();
Assert.IsTrue(num != 0);
- //System.out.println("num docs: " + num);
+
rs.Close();
return num;
}
@@ -235,18 +229,23 @@ namespace SpellChecker.Net.Test.Search.S
[Test]
public void TestClose()
{
- IndexReader r = IndexReader.Open(userindex, true);
+ var r = IndexReader.Open(userindex, true);
spellChecker.ClearIndex();
- String field = "field1";
+ const string field = "field1";
+
Addwords(r, "field1");
int num_field1 = this.Numdoc();
+
Addwords(r, "field2");
int num_field2 = this.Numdoc();
+
Assert.AreEqual(num_field2, num_field1 + 1);
+
CheckCommonSuggestions(r);
AssertLastSearcherOpen(4);
spellChecker.Close();
AssertSearchersClosed();
+
try
{
spellChecker.Close();
@@ -352,7 +351,8 @@ namespace SpellChecker.Net.Test.Search.S
AssertSearchersClosed();
}
- private void joinAll(SpellCheckWorker[] workers, long timeout)
+
+ private static void joinAll(SpellCheckWorker[] workers, long timeout)
{
for (int j = 0; j < workers.Length; j++)
{
@@ -470,8 +470,8 @@ namespace SpellChecker.Net.Test.Search.S
public class SpellCheckerMock : SpellChecker.Net.Search.Spell.SpellChecker
{
- private TestSpellChecker enclosingInstance;
- ArrayList searchers = ArrayList.Synchronized(new ArrayList()); // <--New !!!!!!!
+ private readonly TestSpellChecker enclosingInstance;
+ private readonly ConcurrentQueue<IndexSearcher> searchers = new ConcurrentQueue<IndexSearcher>();
public SpellCheckerMock(Directory spellIndex, TestSpellChecker inst)
: base(spellIndex)
{
@@ -487,7 +487,7 @@ namespace SpellChecker.Net.Test.Search.S
public override IndexSearcher CreateSearcher(Directory dir)
{
IndexSearcher searcher = base.CreateSearcher(dir);
- searchers.Add(searcher);
+ searchers.Enqueue(searcher);
return searcher;
}
}