You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by dn...@apache.org on 2007/05/26 13:06:39 UTC

svn commit: r541885 - in /lucene/java/trunk: ./ contrib/spellchecker/src/java/org/apache/lucene/search/spell/ contrib/spellchecker/src/test/org/apache/lucene/search/spell/

Author: dnaber
Date: Sat May 26 04:06:38 2007
New Revision: 541885

URL: http://svn.apache.org/viewvc?view=rev&rev=541885
Log:
LUCENE-886: cleanup: javadoc improvements; don't print exceptions to stderr but re-throw them; new test case

Added:
    lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat May 26 04:06:38 2007
@@ -131,6 +131,10 @@
 17. LUCENE-881: QueryParser.escape() now also escapes the characters
     '|' and '&' which are part of the queryparser syntax. (Michael Busch)
 
+18. LUCENE-886: Spellchecker clean up: exceptions aren't printed to STDERR
+    anymore and ignored, but re-thrown. Some javadoc improvements.
+    (Daniel Naber)
+
 New features
 
  1. LUCENE-759: Added two n-gram-producing TokenFilters.

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java Sat May 26 04:06:38 2007
@@ -19,7 +19,9 @@
 import java.util.Iterator;
 
 /**
- * A simple interface representing a Dictionary
+ * A simple interface representing a Dictionary. A Dictionary
+ * here is just a list of words.
+ * 
  * @author Nicolas Maisonneuve
  * @version 1.0
  */

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Sat May 26 04:06:38 2007
@@ -27,7 +27,8 @@
 import java.io.*;
 
 /**
- * Lucene Dictionary
+ * Lucene Dictionary: terms taken from the given field
+ * of a Lucene index.
  *
  * @author Nicolas Maisonneuve
  */
@@ -54,7 +55,7 @@
       try {
         termEnum = reader.terms(new Term(field, ""));
       } catch (IOException e) {
-        e.printStackTrace();
+        throw new RuntimeException(e);
       }
     }
 
@@ -86,8 +87,7 @@
         }
         return true;
       } catch (IOException e) {
-        e.printStackTrace();
-        return false;
+        throw new RuntimeException(e);
       }
     }
 

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java Sat May 26 04:06:38 2007
@@ -19,14 +19,11 @@
 
 
 import java.util.Iterator;
-import java.io.InputStream;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
 import java.io.*;
 
 
 /**
- * Dictionary represented by a file text.
+ * Dictionary represented by a text file.
  * 
  * <p/>Format allowed: 1 word per line:<br/>
  * word1<br/>
@@ -49,6 +46,13 @@
     in = new BufferedReader(new InputStreamReader(dictFile));
   }
 
+  /**
+   * Create a dictionary based on a reader. Used by the test case.
+   */
+  protected PlainTextDictionary(Reader reader) {
+    in = new BufferedReader(reader);
+  }
+
   public Iterator getWordsIterator() {
     return new fileIterator();
   }
@@ -63,21 +67,19 @@
       return line;
     }
 
-
     public boolean hasNext() {
       hasNextCalled = true;
       try {
         line = in.readLine();
       } catch (IOException ex) {
-        ex.printStackTrace();
-        line = null;
-        return false;
+        throw new RuntimeException(ex);
       }
       return (line != null) ? true : false;
     }
 
 
     public void remove() {
+      throw new UnsupportedOperationException();
     }
   }
 

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Sat May 26 04:06:38 2007
@@ -78,10 +78,25 @@
   // minimum score for hits generated by the spell checker query
   private float minScore = 0.5f;
 
+  /**
+   * Use the given directory as a spell checker index. The directory
+   * is created if it doesn't exist yet.
+   * 
+   * @param spellIndex
+   * @throws IOException
+   */
   public SpellChecker(Directory spellIndex) throws IOException {
     this.setSpellIndex(spellIndex);
   }
 
+  /**
+   * Use a different index as the spell checker index or re-open
+   * the existing index if <code>spellIndex</code> is the same value
+   * as given in the constructor.
+   * 
+   * @param spellIndex
+   * @throws IOException
+   */
   public void setSpellIndex(Directory spellIndex) throws IOException {
     this.spellIndex = spellIndex;
     if (!IndexReader.indexExists(spellIndex)) {
@@ -98,22 +113,23 @@
   /**
    * Sets the accuracy 0 &lt; minScore &lt; 1; default 0.5
    */
-  public void setAccuracy(float min) {
-    this.minScore = min;
+  public void setAccuracy(float minScore) {
+    this.minScore = minScore;
   }
 
   /**
-   * As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+   * Suggest similar words.
+   * 
+   * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
    * is not the same as the edit distance strategy used to calculate the best
    * matching spell-checked word from the hits that Lucene found, one usually has
    * to retrieve a couple of numSug's in order to get the true best match.
    *
-   * I.e. if numSug == 1, don't count on that suggestion being the best one.
+   * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
    * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
    *
-   * Suggest similar words
-   * @param word String the word you want a spell check done on
-   * @param numSug int the number of suggest words
+   * @param word the word you want a spell check done on
+   * @param numSug the number of suggested words
    * @throws IOException
    * @return String[]
    */
@@ -122,24 +138,25 @@
   }
 
   /**
-   * As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+   * Suggest similar words (optionally restricted to a field of an index).
+   * 
+   * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
    * is not the same as the edit distance strategy used to calculate the best
    * matching spell-checked word from the hits that Lucene found, one usually has
    * to retrieve a couple of numSug's in order to get the true best match.
    *
-   * I.e. if numSug == 1, don't count on that suggestion being the best one.
+   * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
    * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
    *
-   * Suggest similar words (restricted or not to a field of a user index)
-   * @param word String the word you want a spell check done on
-   * @param numSug int the number of suggest words
+   * @param word the word you want a spell check done on
+   * @param numSug the number of suggested words
    * @param ir the indexReader of the user index (can be null see field param)
-   * @param field String the field of the user index: if field is not null, the suggested
+   * @param field the field of the user index: if field is not null, the suggested
    * words are restricted to the words present in this field.
-   * @param morePopular boolean return only the suggest words that are more frequent than the searched word
+   * @param morePopular return only the suggest words that are more frequent than the searched word
    * (only if restricted mode = (indexReader!=null and field!=null)
    * @throws IOException
-   * @return String[] the sorted list of the suggest words with this 2 criteria:
+   * @return String[] the sorted list of the suggest words with these 2 criteria:
    * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
    * of the suggest words in the field of the user index
    */
@@ -262,6 +279,10 @@
     return res;
   }
 
+  /**
+   * Removes all terms from the spell check index.
+   * @throws IOException
+   */
   public void clearIndex() throws IOException {
     IndexReader.unlock(spellIndex);
     IndexWriter writer = new IndexWriter(spellIndex, null, true);
@@ -270,7 +291,7 @@
 
   /**
    * Check whether the word exists in the index.
-   * @param word String
+   * @param word
    * @throws IOException
    * @return true iff the word exists in the index
    */
@@ -371,6 +392,9 @@
     }
   }
 
+  /**
+   * Closes the internal IndexReader.
+   */
   protected void finalize() throws Throwable {
     try {
       if (reader != null) {

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html?view=diff&rev=541885&r1=541884&r2=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html Sat May 26 04:06:38 2007
@@ -1,5 +1,6 @@
 <html><head></head>
 <body>
 Suggest alternate spellings for words.
+Also see <a href="http://wiki.apache.org/jakarta-lucene/SpellChecker">the spell checker Wiki page</a>.
 </body>
 </html>

Added: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java?view=auto&rev=541885
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java (added)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java Sat May 26 04:06:38 2007
@@ -0,0 +1,47 @@
+package org.apache.lucene.search.spell;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Test case for PlainTextDictionary
+ *
+ * @author Daniel Naber
+ */
+public class TestPlainTextDictionary extends TestCase {
+
+  public void testBuild() throws IOException {
+    final String LF = System.getProperty("line.separator");
+    String input = "oneword" + LF + "twoword" + LF + "threeword";
+    PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(input));
+    RAMDirectory ramDir = new RAMDirectory();
+    SpellChecker spellChecker = new SpellChecker(ramDir);
+    spellChecker.indexDictionary(ptd);
+    String[] similar = spellChecker.suggestSimilar("treeword", 2);
+    assertEquals(2, similar.length);
+    assertEquals(similar[0], "threeword");
+    assertEquals(similar[1], "twoword");
+  }
+
+}