You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2010/05/30 16:20:28 UTC
svn commit: r949519 [2/2] - in
/lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net: SpellChecker.Net/
SpellChecker.Net/Spell/ Test/ Test/Test/
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestLuceneDictionary.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using Lucene.Net.Store;
+using Lucene.Net.Index;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using SpellChecker.Net.Search.Spell;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+ [TestFixture]
+ public class TestLuceneDictionary
+ {
+
+ private Directory store = new RAMDirectory();
+
+ private IndexReader indexReader = null;
+
+ private LuceneDictionary ld;
+ private IEnumerator it;
+
+ [SetUp]
+ public void SetUp()
+ {
+
+ IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
+
+ Document doc;
+
+ doc = new Document();
+ doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+
+ doc = new Document();
+ doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+
+ doc = new Document();
+ doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+
+ doc = new Document();
+ doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+
+ doc = new Document();
+ doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+
+ writer.Optimize();
+ writer.Close();
+ }
+
+ [Test]
+ public void TestFieldNonExistent()
+ {
+ try
+ {
+ indexReader = IndexReader.Open(store);
+
+ ld = new LuceneDictionary(indexReader, "nonexistent_field");
+ it = ld.GetWordsIterator();
+
+ AssertFalse("More elements than expected", it.HasNext());
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+ }
+ finally
+ {
+ if (indexReader != null) { indexReader.Close(); }
+ }
+ }
+
+ [Test]
+ public void TestFieldAaa()
+ {
+ try
+ {
+ indexReader = IndexReader.Open(store);
+
+ ld = new LuceneDictionary(indexReader, "aaa");
+ it = ld.GetWordsIterator();
+
+ AssertTrue("First element doesn't exist.", it.HasNext());
+ AssertTrue("First element isn't correct", it.Next().Equals("foo"));
+ AssertFalse("More elements than expected", it.HasNext());
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+ }
+ finally
+ {
+ if (indexReader != null) { indexReader.Close(); }
+ }
+ }
+
+ [Test]
+ public void TestFieldContents_1()
+ {
+ try
+ {
+ indexReader = IndexReader.Open(store);
+
+ ld = new LuceneDictionary(indexReader, "contents");
+ it = ld.GetWordsIterator();
+
+ AssertTrue("First element doesn't exist.", it.HasNext());
+ AssertTrue("First element isn't correct", it.Next().Equals("Jerry"));
+ AssertTrue("Second element doesn't exist.", it.HasNext());
+ AssertTrue("Second element isn't correct", it.Next().Equals("Tom"));
+ AssertFalse("More elements than expected", it.HasNext());
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+
+ ld = new LuceneDictionary(indexReader, "contents");
+ it = ld.GetWordsIterator();
+
+ int counter = 2;
+ while (it.HasNext())
+ {
+ it.Next();
+ counter--;
+ }
+
+ AssertTrue("Number of words incorrect", counter == 0);
+ }
+ finally
+ {
+ if (indexReader != null) { indexReader.Close(); }
+ }
+ }
+
+ [Test]
+ public void TestFieldContents_2()
+ {
+ try
+ {
+ indexReader = IndexReader.Open(store);
+
+ ld = new LuceneDictionary(indexReader, "contents");
+ it = ld.GetWordsIterator();
+
+ // hasNext() should have no side effects //{{DIGY}} But has. Need a fix?
+ //AssertTrue("First element isn't were it should be.", it.HasNext());
+ //AssertTrue("First element isn't were it should be.", it.HasNext());
+ //AssertTrue("First element isn't were it should be.", it.HasNext());
+
+ // just iterate through words
+ AssertTrue("First element isn't correct", it.Next().Equals("Jerry"));
+ AssertTrue("Second element isn't correct", it.Next().Equals("Tom"));
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+
+ // hasNext() should still have no side effects ...
+ AssertFalse("There should be any more elements", it.HasNext());
+ AssertFalse("There should be any more elements", it.HasNext());
+ AssertFalse("There should be any more elements", it.HasNext());
+
+ // .. and there are really no more words
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+ }
+ finally
+ {
+ if (indexReader != null) { indexReader.Close(); }
+ }
+ }
+
+ [Test]
+ public void TestFieldZzz()
+ {
+ try
+ {
+ indexReader = IndexReader.Open(store);
+
+ ld = new LuceneDictionary(indexReader, "zzz");
+ it = ld.GetWordsIterator();
+
+ AssertTrue("First element doesn't exist.", it.HasNext());
+ AssertTrue("First element isn't correct", it.Next().Equals("bar"));
+ AssertFalse("More elements than expected", it.HasNext());
+ AssertTrue("Nonexistent element is really null", it.Next() == null);
+ }
+ finally
+ {
+ if (indexReader != null) { indexReader.Close(); }
+ }
+ }
+
+ [Test]
+ public void TestSpellchecker()
+ {
+ SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory());
+ indexReader = IndexReader.Open(store);
+ sc.IndexDictionary(new LuceneDictionary(indexReader, "contents"));
+ String[] suggestions = sc.SuggestSimilar("Tam", 1);
+ AssertEquals(1, suggestions.Length);
+ AssertEquals("Tom", suggestions[0]);
+ suggestions = sc.SuggestSimilar("Jarry", 1);
+ AssertEquals(1, suggestions.Length);
+ AssertEquals("Jerry", suggestions[0]);
+ indexReader.Close();
+ }
+
+ #region .NET
+ void AssertTrue(string s, bool b)
+ {
+ Assert.IsTrue(b, s);
+ }
+
+ void AssertFalse(string s, bool b)
+ {
+ Assert.IsFalse(b, s);
+ }
+
+ void AssertEquals(int i, int j)
+ {
+ Assert.AreEqual(i, j);
+ }
+
+ void AssertEquals(string i, string j)
+ {
+ Assert.AreEqual(i, j);
+ }
+ #endregion
+ }
+}
+
+#region .NET
+namespace SpellChecker.Net.Test.Search.Spell
+{
+ public static class Extensions
+ {
+ public static bool HasNext(this IEnumerator a)
+ {
+ return a.MoveNext();
+ }
+
+ public static object Next(this IEnumerator a)
+ {
+ return a.Current;
+ }
+ }
+}
+
+namespace System.Runtime.CompilerServices
+{
+ [AttributeUsage(AttributeTargets.Method)]
+ public sealed class ExtensionAttribute : Attribute
+ {
+ public ExtensionAttribute() { }
+ }
+}
+#endregion
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestNGramDistance.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using SpellChecker.Net.Search.Spell;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+ [TestFixture]
+ public class TestNGramDistance
+ {
+ [Test]
+ public void TestGetDistance1()
+ {
+ StringDistance nsd = new NGramDistance(1);
+ float d = nsd.GetDistance("al", "al");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = nsd.GetDistance("a", "a");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = nsd.GetDistance("b", "a");
+ Assert.AreEqual(d, 0.0f, 0.001);
+ d = nsd.GetDistance("martha", "marhta");
+ Assert.AreEqual(d, 0.6666, 0.001);
+ d = nsd.GetDistance("jones", "johnson");
+ Assert.AreEqual(d, 0.4285, 0.001);
+ d = nsd.GetDistance("natural", "contrary");
+ Assert.AreEqual(d, 0.25, 0.001);
+ d = nsd.GetDistance("abcvwxyz", "cabvwxyz");
+ Assert.AreEqual(d, 0.75, 0.001);
+ d = nsd.GetDistance("dwayne", "duane");
+ Assert.AreEqual(d, 0.666, 0.001);
+ d = nsd.GetDistance("dixon", "dicksonx");
+ Assert.AreEqual(d, 0.5, 0.001);
+ d = nsd.GetDistance("six", "ten");
+ Assert.AreEqual(d, 0, 0.001);
+ float d1 = nsd.GetDistance("zac ephron", "zac efron");
+ float d2 = nsd.GetDistance("zac ephron", "kai ephron");
+ Assert.AreEqual(d1, d2, 0.001);
+ d1 = nsd.GetDistance("brittney spears", "britney spears");
+ d2 = nsd.GetDistance("brittney spears", "brittney startzman");
+ Assert.IsTrue(d1 > d2);
+ d1 = nsd.GetDistance("12345678", "12890678");
+ d2 = nsd.GetDistance("12345678", "72385698");
+ Assert.AreEqual(d1, d2, 001);
+ }
+
+ [Test]
+ public void TestGetDistance2()
+ {
+ StringDistance sd = new NGramDistance(2);
+ float d = sd.GetDistance("al", "al");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = sd.GetDistance("a", "a");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = sd.GetDistance("b", "a");
+ Assert.AreEqual(d, 0.0f, 0.001);
+ d = sd.GetDistance("a", "aa");
+ Assert.AreEqual(d, 0.5f, 0.001);
+ d = sd.GetDistance("martha", "marhta");
+ Assert.AreEqual(d, 0.6666, 0.001);
+ d = sd.GetDistance("jones", "johnson");
+ Assert.AreEqual(d, 0.4285, 0.001);
+ d = sd.GetDistance("natural", "contrary");
+ Assert.AreEqual(d, 0.25, 0.001);
+ d = sd.GetDistance("abcvwxyz", "cabvwxyz");
+ Assert.AreEqual(d, 0.625, 0.001);
+ d = sd.GetDistance("dwayne", "duane");
+ Assert.AreEqual(d, 0.5833, 0.001);
+ d = sd.GetDistance("dixon", "dicksonx");
+ Assert.AreEqual(d, 0.5, 0.001);
+ d = sd.GetDistance("six", "ten");
+ Assert.AreEqual(d, 0, 0.001);
+ float d1 = sd.GetDistance("zac ephron", "zac efron");
+ float d2 = sd.GetDistance("zac ephron", "kai ephron");
+ Assert.IsTrue(d1 > d2);
+ d1 = sd.GetDistance("brittney spears", "britney spears");
+ d2 = sd.GetDistance("brittney spears", "brittney startzman");
+ Assert.IsTrue(d1 > d2);
+ d1 = sd.GetDistance("0012345678", "0012890678");
+ d2 = sd.GetDistance("0012345678", "0072385698");
+ Assert.AreEqual(d1, d2, 0.001);
+ }
+
+ [Test]
+ public void TestGetDistance3()
+ {
+ StringDistance sd = new NGramDistance(3);
+ float d = sd.GetDistance("al", "al");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = sd.GetDistance("a", "a");
+ Assert.AreEqual(d, 1.0f, 0.001);
+ d = sd.GetDistance("b", "a");
+ Assert.AreEqual(d, 0.0f, 0.001);
+ d = sd.GetDistance("martha", "marhta");
+ Assert.AreEqual(d, 0.7222, 0.001);
+ d = sd.GetDistance("jones", "johnson");
+ Assert.AreEqual(d, 0.4762, 0.001);
+ d = sd.GetDistance("natural", "contrary");
+ Assert.AreEqual(d, 0.2083, 0.001);
+ d = sd.GetDistance("abcvwxyz", "cabvwxyz");
+ Assert.AreEqual(d, 0.5625, 0.001);
+ d = sd.GetDistance("dwayne", "duane");
+ Assert.AreEqual(d, 0.5277, 0.001);
+ d = sd.GetDistance("dixon", "dicksonx");
+ Assert.AreEqual(d, 0.4583, 0.001);
+ d = sd.GetDistance("six", "ten");
+ Assert.AreEqual(d, 0, 0.001);
+ float d1 = sd.GetDistance("zac ephron", "zac efron");
+ float d2 = sd.GetDistance("zac ephron", "kai ephron");
+ Assert.IsTrue(d1 > d2);
+ d1 = sd.GetDistance("brittney spears", "britney spears");
+ d2 = sd.GetDistance("brittney spears", "brittney startzman");
+ Assert.IsTrue(d1 > d2);
+ d1 = sd.GetDistance("0012345678", "0012890678");
+ d2 = sd.GetDistance("0012345678", "0072385698");
+ Assert.IsTrue(d1 < d2);
+ }
+
+ public void TestEmpty()
+ {
+ StringDistance nsd = new NGramDistance(1);
+ float d = nsd.GetDistance("", "al");
+ Assert.AreEqual(d, 0.0f, 0.001);
+ }
+
+ }
+
+
+}
Added: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs?rev=949519&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs (added)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestPlainTextDictionary.cs Sun May 30 14:20:28 2010
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections.Generic;
+using System.Text;
+
+using NUnit.Framework;
+
+using SpellChecker.Net.Search.Spell;
+
+using Lucene.Net.Store;
+
+namespace SpellChecker.Net.Test.Search.Spell
+{
+ [TestFixture]
+ public class TestPlainTextDictionary
+ {
+ [Test]
+ public void TestBuild()
+ {
+
+ String LF = System.Environment.NewLine;
+ String input = "oneword" + LF + "twoword" + LF + "threeword";
+ PlainTextDictionary ptd = new PlainTextDictionary( new MemoryStream( System.Text.Encoding.UTF8.GetBytes(input)) );
+ RAMDirectory ramDir = new RAMDirectory();
+ SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir);
+ spellChecker.IndexDictionary(ptd);
+ String[] similar = spellChecker.SuggestSimilar("treeword", 2);
+ Assert.AreEqual(2, similar.Length);
+ Assert.AreEqual(similar[0], "threeword");
+ Assert.AreEqual(similar[1], "twoword");
+ }
+ }
+}
Modified: lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs?rev=949519&r1=949518&r2=949519&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/SpellChecker.Net/Test/Test/TestSpellChecker.cs Sun May 30 14:20:28 2010
@@ -28,118 +28,469 @@ using Field = Lucene.Net.Documents.Field
using IndexReader = Lucene.Net.Index.IndexReader;
using Directory = Lucene.Net.Store.Directory;
using LuceneDictionary = SpellChecker.Net.Search.Spell.LuceneDictionary;
+using System.Collections;
+using Lucene.Net.Store;
+using System.Threading;
+using SpellChecker.Net.Search.Spell;
+using Lucene.Net.Search;
namespace SpellChecker.Net.Test.Search.Spell
{
-
-
- /// <summary> Test case
- ///
- /// </summary>
- /// <author> Nicolas Maisonneuve
- /// </author>
- [TestFixture]
+
+
+ /// <summary> Test case
+ ///
+ /// </summary>
+ /// <author> Nicolas Maisonneuve
+ /// </author>
+ [TestFixture]
public class TestSpellChecker
- {
- private SpellChecker.Net.Search.Spell.SpellChecker spellChecker;
- private Directory userindex, spellindex;
-
+ {
+ private SpellCheckerMock spellChecker;
+ private Directory userindex, spellindex;
+ public ArrayList searchers;
+ private Random random = new Random();
+
+
[SetUp]
- public virtual void SetUp()
- {
- //create a user index
- userindex = new RAMDirectory();
- IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);
-
- for (int i = 0; i < 1000; i++)
- {
- Document doc = new Document();
- doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
- doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES, Field.Index.TOKENIZED)); // + word thousand
- writer.AddDocument(doc);
- }
- writer.Close();
-
- // create the spellChecker
- spellindex = new RAMDirectory();
- spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellindex);
- }
-
- [Test]
- public virtual void TestBuild()
- {
- try
- {
- IndexReader r = IndexReader.Open(userindex);
-
- spellChecker.ClearIndex();
-
- Addwords(r, "field1");
- int num_field1 = this.Numdoc();
-
- Addwords(r, "field2");
- int num_field2 = this.Numdoc();
-
- Assert.AreEqual(num_field2, num_field1 + 1);
-
- // test small word
- System.String[] similar = spellChecker.SuggestSimilar("fvie", 2);
- Assert.AreEqual(1, similar.Length);
- Assert.AreEqual(similar[0], "five");
-
- similar = spellChecker.SuggestSimilar("five", 2);
- Assert.AreEqual(1, similar.Length);
- Assert.AreEqual(similar[0], "nine"); // don't suggest a word for itself
-
- similar = spellChecker.SuggestSimilar("fiv", 2);
- Assert.AreEqual(1, similar.Length);
- Assert.AreEqual(similar[0], "five");
-
- similar = spellChecker.SuggestSimilar("ive", 2);
- Assert.AreEqual(1, similar.Length);
- Assert.AreEqual(similar[0], "five");
-
- similar = spellChecker.SuggestSimilar("fives", 2);
- Assert.AreEqual(1, similar.Length);
- Assert.AreEqual(similar[0], "five");
-
- similar = spellChecker.SuggestSimilar("fie", 2);
- Assert.AreEqual(1, similar.Length);
- Assert.AreEqual(similar[0], "five");
-
- similar = spellChecker.SuggestSimilar("fi", 2);
- Assert.AreEqual(0, similar.Length);
-
- // test restraint to a field
- similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
- Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field field1
-
- similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
- Assert.AreEqual(1, similar.Length); // there is the term thousand in the field field2
- }
- catch (System.IO.IOException e)
- {
+ public virtual void SetUp()
+ {
+ //create a user index
+ userindex = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+
+ for (int i = 0; i < 1000; i++)
+ {
+ Document doc = new Document();
+ doc.Add(new Field("field1", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
+ doc.Add(new Field("field2", English.IntToEnglish(i + 1), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand
+ writer.AddDocument(doc);
+ }
+ writer.Close();
+
+ // create the spellChecker
+ spellindex = new RAMDirectory();
+ searchers = ArrayList.Synchronized(new ArrayList());
+ spellChecker = new SpellCheckerMock(spellindex, this);
+ }
+
+ [Test]
+ public virtual void TestBuild()
+ {
+ try
+ {
+ IndexReader r = IndexReader.Open(userindex, true);
+
+ spellChecker.ClearIndex();
+
+ Addwords(r, "field1");
+ int num_field1 = this.Numdoc();
+
+ Addwords(r, "field2");
+ int num_field2 = this.Numdoc();
+
+ Assert.AreEqual (num_field2, num_field1 + 1);
+
+ AssertLastSearcherOpen(4);
+
+ CheckCommonSuggestions(r);
+ CheckLevenshteinSuggestions(r);
+
+ spellChecker.setStringDistance(new JaroWinklerDistance());
+ spellChecker.SetAccuracy(0.8f);
+ CheckCommonSuggestions(r);
+ CheckJaroWinklerSuggestions();
+
+ spellChecker.setStringDistance(new NGramDistance(2));
+ spellChecker.SetAccuracy(0.5f);
+ CheckCommonSuggestions(r);
+ CheckNGramSuggestions();
+ }
+ catch (System.IO.IOException e)
+ {
System.Console.Error.WriteLine(e.StackTrace);
- Assert.Fail();
- }
- }
-
- private void Addwords(IndexReader r, System.String field)
- {
- long time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
- spellChecker.IndexDictionary(new LuceneDictionary(r, field));
- time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - time;
- //System.out.println("time to build " + field + ": " + time);
- }
-
- private int Numdoc()
- {
- IndexReader rs = IndexReader.Open(spellindex);
- int num = rs.NumDocs();
- Assert.IsTrue(num != 0);
- //System.out.println("num docs: " + num);
- rs.Close();
- return num;
- }
- }
+ Assert.Fail();
+ }
+ }
+ private void CheckCommonSuggestions(IndexReader r)
+ {
+ String[] similar = spellChecker.SuggestSimilar("fvie", 2);
+ Assert.True(similar.Length > 0);
+ Assert.AreEqual(similar[0], "five");
+
+ similar = spellChecker.SuggestSimilar("five", 2);
+ if (similar.Length > 0)
+ {
+ Assert.False(similar[0].Equals("five")); // don't suggest a word for itself
+ }
+
+ similar = spellChecker.SuggestSimilar("fiv", 2);
+ Assert.True(similar.Length > 0);
+ Assert.AreEqual(similar[0], "five");
+
+ similar = spellChecker.SuggestSimilar("fives", 2);
+ Assert.True(similar.Length > 0);
+ Assert.AreEqual(similar[0], "five");
+
+ Assert.True(similar.Length > 0);
+ similar = spellChecker.SuggestSimilar("fie", 2);
+ Assert.AreEqual(similar[0], "five");
+
+ // test restraint to a field
+ similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
+ Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field field1
+
+ similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
+ Assert.AreEqual(1, similar.Length); // there is the term thousand in the field field2
+ }
+
+ private void CheckLevenshteinSuggestions(IndexReader r)
+ {
+ // test small word
+ String[] similar = spellChecker.SuggestSimilar("fvie", 2);
+ Assert.AreEqual(1, similar.Length);
+ Assert.AreEqual(similar[0], "five");
+
+ similar = spellChecker.SuggestSimilar("five", 2);
+ Assert.AreEqual(1, similar.Length);
+ Assert.AreEqual(similar[0], "nine"); // don't suggest a word for itself
+
+ similar = spellChecker.SuggestSimilar("fiv", 2);
+ Assert.AreEqual(1, similar.Length);
+ Assert.AreEqual(similar[0], "five");
+
+ similar = spellChecker.SuggestSimilar("ive", 2);
+ Assert.AreEqual(2, similar.Length);
+ Assert.AreEqual(similar[0], "five");
+ Assert.AreEqual(similar[1], "nine");
+
+ similar = spellChecker.SuggestSimilar("fives", 2);
+ Assert.AreEqual(1, similar.Length);
+ Assert.AreEqual(similar[0], "five");
+
+ similar = spellChecker.SuggestSimilar("fie", 2);
+ Assert.AreEqual(2, similar.Length);
+ Assert.AreEqual(similar[0], "five");
+ Assert.AreEqual(similar[1], "nine");
+
+ similar = spellChecker.SuggestSimilar("fi", 2);
+ Assert.AreEqual(1, similar.Length);
+ Assert.AreEqual(similar[0], "five");
+
+ // test restraint to a field
+ similar = spellChecker.SuggestSimilar("tousand", 10, r, "field1", false);
+ Assert.AreEqual(0, similar.Length); // there isn't the term thousand in the field field1
+
+ similar = spellChecker.SuggestSimilar("tousand", 10, r, "field2", false);
+ Assert.AreEqual(1, similar.Length); // there is the term thousand in the field field2
+
+ similar = spellChecker.SuggestSimilar("onety", 2);
+ Assert.AreEqual(2, similar.Length);
+ Assert.AreEqual(similar[0], "ninety");
+ Assert.AreEqual(similar[1], "one");
+ try
+ {
+ similar = spellChecker.SuggestSimilar("tousand", 10, r, null, false);
+ }
+ catch (NullReferenceException e)
+ {
+ Assert.True(false, "threw an NPE, and it shouldn't have");
+ }
+ }
+
+ private void CheckJaroWinklerSuggestions()
+ {
+ String[] similar = spellChecker.SuggestSimilar("onety", 2);
+ Assert.AreEqual(2, similar.Length);
+ Assert.AreEqual(similar[0], "one");
+ Assert.AreEqual(similar[1], "ninety");
+ }
+
+ private void CheckNGramSuggestions()
+ {
+ String[] similar = spellChecker.SuggestSimilar("onety", 2);
+ Assert.AreEqual(2, similar.Length);
+ Assert.AreEqual(similar[0], "one");
+ Assert.AreEqual(similar[1], "ninety");
+ }
+
+ private void Addwords(IndexReader r, System.String field)
+ {
+ long time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
+ spellChecker.IndexDictionary(new LuceneDictionary(r, field));
+ time = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - time;
+ //System.out.println("time to build " + field + ": " + time);
+ }
+
+ private int Numdoc()
+ {
+ IndexReader rs = IndexReader.Open(spellindex);
+ int num = rs.NumDocs();
+ Assert.IsTrue(num != 0);
+ //System.out.println("num docs: " + num);
+ rs.Close();
+ return num;
+ }
+
+ [Test]
+ public void TestClose()
+ {
+ IndexReader r = IndexReader.Open(userindex, true);
+ spellChecker.ClearIndex();
+ String field = "field1";
+ Addwords(r, "field1");
+ int num_field1 = this.Numdoc();
+ Addwords(r, "field2");
+ int num_field2 = this.Numdoc();
+ Assert.AreEqual(num_field2, num_field1 + 1);
+ CheckCommonSuggestions(r);
+ AssertLastSearcherOpen(4);
+ spellChecker.Close();
+ AssertSearchersClosed();
+ try
+ {
+ spellChecker.Close();
+ Assert.Fail("spellchecker was already closed");
+ }
+ catch (AlreadyClosedException e)
+ {
+ // expected
+ }
+ try
+ {
+ CheckCommonSuggestions(r);
+ Assert.Fail("spellchecker was already closed");
+ }
+ catch (AlreadyClosedException e)
+ {
+ // expected
+ }
+
+ try
+ {
+ spellChecker.ClearIndex();
+ Assert.Fail("spellchecker was already closed");
+ }
+ catch (AlreadyClosedException e)
+ {
+ // expected
+ }
+
+ try
+ {
+ spellChecker.IndexDictionary(new LuceneDictionary(r, field));
+ Assert.Fail("spellchecker was already closed");
+ }
+ catch (AlreadyClosedException e)
+ {
+ // expected
+ }
+
+ try
+ {
+ spellChecker.SetSpellIndex(spellindex);
+ Assert.Fail("spellchecker was already closed");
+ }
+ catch (AlreadyClosedException e)
+ {
+ // expected
+ }
+ Assert.AreEqual(4, searchers.Count);
+ AssertSearchersClosed();
+ }
+
+ /*
+ * tests if the internally shared indexsearcher is correctly closed
+ * when the spellchecker is concurrently accessed and closed.
+ */
+ [Test]
+ public void TestConcurrentAccess()
+ {
+ Assert.AreEqual(1, searchers.Count);
+ IndexReader r = IndexReader.Open(userindex, true);
+ spellChecker.ClearIndex();
+ Assert.AreEqual(2, searchers.Count);
+ Addwords(r, "field1");
+ Assert.AreEqual(3, searchers.Count);
+ int num_field1 = this.Numdoc();
+ Addwords(r, "field2");
+ Assert.AreEqual(4, searchers.Count);
+ int num_field2 = this.Numdoc();
+ Assert.AreEqual(num_field2, num_field1 + 1);
+ int numThreads = 5 + this.random.Next(5);
+ SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
+ for (int i = 0; i < numThreads; i++)
+ {
+ SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r, this);
+ spellCheckWorker.start();
+ workers[i] = spellCheckWorker;
+
+ }
+ int iterations = 5 + random.Next(5);
+ for (int i = 0; i < iterations; i++)
+ {
+ Thread.Sleep(100);
+ // concurrently reset the spell index
+ spellChecker.SetSpellIndex(this.spellindex);
+ // for debug - prints the internal Open searchers
+ // showSearchersOpen();
+ }
+
+ spellChecker.Close();
+ joinAll(workers, 5000);
+
+ for (int i = 0; i < workers.Length; i++)
+ {
+ Assert.False(workers[i].failed);
+ Assert.True(workers[i].terminated);
+ }
+ // 4 searchers more than iterations
+ // 1. at creation
+ // 2. ClearIndex()
+ // 2. and 3. during Addwords
+ Assert.AreEqual(iterations + 4, searchers.Count);
+ AssertSearchersClosed();
+
+ }
+ private void joinAll(SpellCheckWorker[] workers, long timeout)
+ {
+ for (int j = 0; j < workers.Length; j++)
+ {
+ long time = (long)DateTime.Now.TimeOfDay.TotalMilliseconds;
+ if (timeout < 0)
+ {
+ // this could be helpful if it Assert.Fails one day
+ Console.WriteLine("Warning: " + (workers.Length - j)
+ + " threads have not joined but joinall timed out");
+ break;
+ }
+ workers[j].join(timeout);
+ timeout -= (long)DateTime.Now.TimeOfDay.TotalMilliseconds - time;
+ }
+ }
+
+ private void AssertLastSearcherOpen(int numSearchers)
+ {
+ Assert.AreEqual(numSearchers, searchers.Count);
+ Object[] searcherArray = searchers.ToArray();
+ for (int i = 0; i < searcherArray.Length; i++)
+ {
+ if (i == searcherArray.Length - 1)
+ {
+ Assert.True(
+ ((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount() > 0,
+ "expected last searcher Open but was closed");
+ }
+ else
+ {
+ Assert.False(
+ ((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount() > 0,
+ "expected closed searcher but was Open - Index: " + i);
+ }
+ }
+ }
+
+ private void AssertSearchersClosed()
+ {
+ Object[] searcherArray = searchers.ToArray();
+ for (int i = 0; i < searcherArray.Length; i++)
+ {
+ Assert.AreEqual(0, ((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount());
+ }
+ }
+
+ private void ShowSearchersOpen()
+ {
+ int count = 0;
+ Object[] searcherArray = searchers.ToArray();
+ for (int i = 0; i < searcherArray.Length; i++)
+ {
+ if (((IndexSearcher)searcherArray[i]).GetIndexReader().GetRefCount() > 0)
+ ++count;
+ }
+ Console.WriteLine(count);
+ }
+
+
+ private class SpellCheckWorker
+ {
+ private readonly IndexReader reader;
+ public bool terminated = false;
+ public bool failed = false;
+ private Thread m_thread;
+ private TestSpellChecker enclosingInstance;
+
+ public SpellCheckWorker(IndexReader reader, TestSpellChecker enclInstance)
+ : base()
+ {
+ this.reader = reader;
+ enclosingInstance = enclInstance;
+ m_thread = new Thread(run);
+ }
+
+ public void run()
+ {
+ try
+ {
+ while (true)
+ {
+ try
+ {
+ enclosingInstance.CheckCommonSuggestions(reader);
+ }
+ catch (AlreadyClosedException e)
+ {
+
+ return;
+ }
+ catch (Exception e)
+ {
+ Console.WriteLine(e.StackTrace);
+ failed = true;
+ return;
+ }
+ }
+ }
+ finally
+ {
+ this.terminated = true;
+ }
+ }
+
+ public void join(long timeout)
+ {
+ m_thread.Join((int)timeout);
+ }
+
+ public void start()
+ {
+ m_thread.Start();
+ }
+ }
+
+ public class SpellCheckerMock : SpellChecker.Net.Search.Spell.SpellChecker
+ {
+ private TestSpellChecker enclosingInstance;
+ ArrayList searchers = ArrayList.Synchronized(new ArrayList()); // <--New !!!!!!!
+ public SpellCheckerMock(Directory spellIndex, TestSpellChecker inst)
+ : base(spellIndex)
+ {
+ enclosingInstance = inst;
+ enclosingInstance.searchers = searchers; //Note: this code is invoked after createSearcher
+ }
+
+ public SpellCheckerMock(Directory spellIndex, StringDistance sd)
+ : base(spellIndex, sd)
+ {
+ }
+
+ public override IndexSearcher CreateSearcher(Directory dir)
+ {
+ IndexSearcher searcher = base.CreateSearcher(dir);
+ searchers.Add(searcher);
+ return searcher;
+ }
+ }
+
+ }
}
\ No newline at end of file