You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by dn...@apache.org on 2007/05/26 13:06:39 UTC
svn commit: r541885 - in /lucene/java/trunk: ./
contrib/spellchecker/src/java/org/apache/lucene/search/spell/
contrib/spellchecker/src/test/org/apache/lucene/search/spell/
Author: dnaber
Date: Sat May 26 04:06:38 2007
New Revision: 541885
URL: http://svn.apache.org/viewvc?view=rev&rev=541885
Log:
LUCENE-886: cleanup: javadoc improvements; don't print exceptions to stderr but re-throw them; new test case
Added:
lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat May 26 04:06:38 2007
@@ -131,6 +131,10 @@
17. LUCENE-881: QueryParser.escape() now also escapes the characters
'|' and '&' which are part of the queryparser syntax. (Michael Busch)
+18. LUCENE-886: Spellchecker clean up: exceptions aren't printed to STDERR
+ anymore and ignored, but re-thrown. Some javadoc improvements.
+ (Daniel Naber)
+
New features
1. LUCENE-759: Added two n-gram-producing TokenFilters.
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java Sat May 26 04:06:38 2007
@@ -19,7 +19,9 @@
import java.util.Iterator;
/**
- * A simple interface representing a Dictionary
+ * A simple interface representing a Dictionary. A Dictionary
+ * here is just a list of words.
+ *
* @author Nicolas Maisonneuve
* @version 1.0
*/
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Sat May 26 04:06:38 2007
@@ -27,7 +27,8 @@
import java.io.*;
/**
- * Lucene Dictionary
+ * Lucene Dictionary: terms taken from the given field
+ * of a Lucene index.
*
* @author Nicolas Maisonneuve
*/
@@ -54,7 +55,7 @@
try {
termEnum = reader.terms(new Term(field, ""));
} catch (IOException e) {
- e.printStackTrace();
+ throw new RuntimeException(e);
}
}
@@ -86,8 +87,7 @@
}
return true;
} catch (IOException e) {
- e.printStackTrace();
- return false;
+ throw new RuntimeException(e);
}
}
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java Sat May 26 04:06:38 2007
@@ -19,14 +19,11 @@
import java.util.Iterator;
-import java.io.InputStream;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
import java.io.*;
/**
- * Dictionary represented by a file text.
+ * Dictionary represented by a text file.
*
* <p/>Format allowed: 1 word per line:<br/>
* word1<br/>
@@ -49,6 +46,13 @@
in = new BufferedReader(new InputStreamReader(dictFile));
}
+ /**
+ * Create a dictionary based on a reader. Used by the test case.
+ */
+ protected PlainTextDictionary(Reader reader) {
+ in = new BufferedReader(reader);
+ }
+
public Iterator getWordsIterator() {
return new fileIterator();
}
@@ -63,21 +67,19 @@
return line;
}
-
public boolean hasNext() {
hasNextCalled = true;
try {
line = in.readLine();
} catch (IOException ex) {
- ex.printStackTrace();
- line = null;
- return false;
+ throw new RuntimeException(ex);
}
return (line != null) ? true : false;
}
public void remove() {
+ throw new UnsupportedOperationException();
}
}
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Sat May 26 04:06:38 2007
@@ -78,10 +78,25 @@
// minimum score for hits generated by the spell checker query
private float minScore = 0.5f;
+ /**
+ * Use the given directory as a spell checker index. The directory
+ * is created if it doesn't exist yet.
+ *
+ * @param spellIndex
+ * @throws IOException
+ */
public SpellChecker(Directory spellIndex) throws IOException {
this.setSpellIndex(spellIndex);
}
+ /**
+ * Use a different index as the spell checker index or re-open
+ * the existing index if <code>spellIndex</code> is the same value
+ * as given in the constructor.
+ *
+ * @param spellIndex
+ * @throws IOException
+ */
public void setSpellIndex(Directory spellIndex) throws IOException {
this.spellIndex = spellIndex;
if (!IndexReader.indexExists(spellIndex)) {
@@ -98,22 +113,23 @@
/**
* Sets the accuracy 0 < minScore < 1; default 0.5
*/
- public void setAccuracy(float min) {
- this.minScore = min;
+ public void setAccuracy(float minScore) {
+ this.minScore = minScore;
}
/**
- * As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+ * Suggest similar words.
+ *
+ * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
* is not the same as the edit distance strategy used to calculate the best
* matching spell-checked word from the hits that Lucene found, one usually has
* to retrieve a couple of numSug's in order to get the true best match.
*
- * I.e. if numSug == 1, don't count on that suggestion being the best one.
+ * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
- * Suggest similar words
- * @param word String the word you want a spell check done on
- * @param numSug int the number of suggest words
+ * @param word the word you want a spell check done on
+ * @param numSug the number of suggested words
* @throws IOException
* @return String[]
*/
@@ -122,24 +138,25 @@
}
/**
- * As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+ * Suggest similar words (optionally restricted to a field of an index).
+ *
+ * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
* is not the same as the edit distance strategy used to calculate the best
* matching spell-checked word from the hits that Lucene found, one usually has
* to retrieve a couple of numSug's in order to get the true best match.
*
- * I.e. if numSug == 1, don't count on that suggestion being the best one.
+ * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
- * Suggest similar words (restricted or not to a field of a user index)
- * @param word String the word you want a spell check done on
- * @param numSug int the number of suggest words
+ * @param word the word you want a spell check done on
+ * @param numSug the number of suggested words
* @param ir the indexReader of the user index (can be null see field param)
- * @param field String the field of the user index: if field is not null, the suggested
+ * @param field the field of the user index: if field is not null, the suggested
* words are restricted to the words present in this field.
- * @param morePopular boolean return only the suggest words that are more frequent than the searched word
+ * @param morePopular return only the suggest words that are more frequent than the searched word
* (only if restricted mode = (indexReader!=null and field!=null)
* @throws IOException
- * @return String[] the sorted list of the suggest words with this 2 criteria:
+ * @return String[] the sorted list of the suggest words with these 2 criteria:
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
* of the suggest words in the field of the user index
*/
@@ -262,6 +279,10 @@
return res;
}
+ /**
+ * Removes all terms from the spell check index.
+ * @throws IOException
+ */
public void clearIndex() throws IOException {
IndexReader.unlock(spellIndex);
IndexWriter writer = new IndexWriter(spellIndex, null, true);
@@ -270,7 +291,7 @@
/**
* Check whether the word exists in the index.
- * @param word String
+ * @param word
* @throws IOException
* @return true iff the word exists in the index
*/
@@ -371,6 +392,9 @@
}
}
+ /**
+ * Closes the internal IndexReader.
+ */
protected void finalize() throws Throwable {
try {
if (reader != null) {
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html Sat May 26 04:06:38 2007
@@ -1,5 +1,6 @@
<html><head></head>
<body>
Suggest alternate spellings for words.
+Also see <a href="http://wiki.apache.org/jakarta-lucene/SpellChecker">the spell checker Wiki page</a>.
</body>
</html>
Added: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java?view=auto&rev=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java (added)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java Sat May 26 04:06:38 2007
@@ -0,0 +1,47 @@
+package org.apache.lucene.search.spell;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Test case for PlainTextDictionary
+ *
+ * @author Daniel Naber
+ */
+public class TestPlainTextDictionary extends TestCase {
+
+ public void testBuild() throws IOException {
+ final String LF = System.getProperty("line.separator");
+ String input = "oneword" + LF + "twoword" + LF + "threeword";
+ PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(input));
+ RAMDirectory ramDir = new RAMDirectory();
+ SpellChecker spellChecker = new SpellChecker(ramDir);
+ spellChecker.indexDictionary(ptd);
+ String[] similar = spellChecker.suggestSimilar("treeword", 2);
+ assertEquals(2, similar.length);
+ assertEquals(similar[0], "threeword");
+ assertEquals(similar[1], "twoword");
+ }
+
+}