You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by dn...@apache.org on 2007/05/31 21:13:37 UTC
svn commit: r543220 - in /lucene/java/trunk: CHANGES.txt
contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
Author: dnaber
Date: Thu May 31 12:13:36 2007
New Revision: 543220
URL: http://svn.apache.org/viewvc?view=rev&rev=543220
Log:
LUCENE-763: LuceneDictionary skips first word in enumeration
Added:
lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=543220&r1=543219&r2=543220
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu May 31 12:13:36 2007
@@ -150,6 +150,9 @@
19. LUCENE-698: FilteredQuery now takes the query boost into account for
scoring. (Michael Busch)
+20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in
+ enumeration. (Christian Mallwitz via Daniel Naber)
+
New features
1. LUCENE-759: Added two n-gram-producing TokenFilters.
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?view=diff&rev=543220&r1=543219&r2=543220
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Thu May 31 12:13:36 2007
@@ -30,7 +30,11 @@
* Lucene Dictionary: terms taken from the given field
* of a Lucene index.
*
+ * When using IndexReader.terms(Term) the code must not call next() on TermEnum
+ * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
+ *
* @author Nicolas Maisonneuve
+ * @author Christian Mallwitz
*/
public class LuceneDictionary implements Dictionary {
private IndexReader reader;
@@ -64,6 +68,13 @@
hasNext();
}
hasNextCalled = false;
+
+ try {
+ termEnum.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
return (actualTerm != null) ? actualTerm.text() : null;
}
@@ -72,23 +83,23 @@
return actualTerm != null;
}
hasNextCalled = true;
- try {
- // if there are no more words
- if (!termEnum.next()) {
- actualTerm = null;
- return false;
- }
- // if the next word is in the field
- actualTerm = termEnum.term();
- String currentField = actualTerm.field();
- if (currentField != field) {
- actualTerm = null;
- return false;
- }
- return true;
- } catch (IOException e) {
- throw new RuntimeException(e);
+
+ actualTerm = termEnum.term();
+
+ // if there are no words return false
+ if (actualTerm == null) {
+ return false;
}
+
+ String currentField = actualTerm.field();
+
+ // if the next word doesn't have the same field return false
+ if (currentField != field) {
+ actualTerm = null;
+ return false;
+ }
+
+ return true;
}
public void remove() {
Added: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java?view=auto&rev=543220
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java (added)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java Thu May 31 12:13:36 2007
@@ -0,0 +1,201 @@
+package org.apache.lucene.search.spell;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Test case for LuceneDictionary.
+ * It first creates a simple index and then a couple of instances of LuceneDictionary
+ * on different fields and checks if all the right text comes back.
+ *
+ * @author Christian Mallwitz
+ */
+public class TestLuceneDictionary extends TestCase {
+
+ private Directory store = new RAMDirectory();
+
+ private IndexReader indexReader = null;
+
+ private LuceneDictionary ld;
+ private Iterator it;
+
+ public void setUp() throws Exception {
+
+ IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
+
+ Document doc;
+
+ doc = new Document();
+ doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("contents", "Tom", Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("zzz", "bar", Field.Store.YES, Field.Index.TOKENIZED));
+ writer.addDocument(doc);
+
+ writer.optimize();
+ writer.close();
+ }
+
+ public void testFieldNonExistent() throws IOException {
+ try {
+ indexReader = IndexReader.open(store);
+
+ ld = new LuceneDictionary(indexReader, "nonexistent_field");
+ it = ld.getWordsIterator();
+
+ assertFalse("More elements than expected", it.hasNext());
+ assertTrue("Nonexistent element is really null", it.next() == null);
+ } finally {
+ if (indexReader != null) { indexReader.close(); }
+ }
+ }
+
+ public void testFieldAaa() throws IOException {
+ try {
+ indexReader = IndexReader.open(store);
+
+ ld = new LuceneDictionary(indexReader, "aaa");
+ it = ld.getWordsIterator();
+
+ assertTrue("First element doesn't exist.", it.hasNext());
+ assertTrue("First element isn't correct", it.next().equals("foo"));
+ assertFalse("More elements than expected", it.hasNext());
+ assertTrue("Nonexistent element is really null", it.next() == null);
+ } finally {
+ if (indexReader != null) { indexReader.close(); }
+ }
+ }
+
+ public void testFieldContents_1() throws IOException {
+ try {
+ indexReader = IndexReader.open(store);
+
+ ld = new LuceneDictionary(indexReader, "contents");
+ it = ld.getWordsIterator();
+
+ assertTrue("First element doesn't exist.", it.hasNext());
+ assertTrue("First element isn't correct", it.next().equals("Jerry"));
+ assertTrue("Second element doesn't exist.", it.hasNext());
+ assertTrue("Second element isn't correct", it.next().equals("Tom"));
+ assertFalse("More elements than expected", it.hasNext());
+ assertTrue("Nonexistent element is really null", it.next() == null);
+
+ ld = new LuceneDictionary(indexReader, "contents");
+ it = ld.getWordsIterator();
+
+ int counter = 2;
+ while (it.hasNext()) {
+ it.next();
+ counter--;
+ }
+
+ assertTrue("Number of words incorrect", counter == 0);
+ }
+ finally {
+ if (indexReader != null) { indexReader.close(); }
+ }
+ }
+
+ public void testFieldContents_2() throws IOException {
+ try {
+ indexReader = IndexReader.open(store);
+
+ ld = new LuceneDictionary(indexReader, "contents");
+ it = ld.getWordsIterator();
+
+ // hasNext() should have no side effects
+ assertTrue("First element isn't were it should be.", it.hasNext());
+ assertTrue("First element isn't were it should be.", it.hasNext());
+ assertTrue("First element isn't were it should be.", it.hasNext());
+
+ // just iterate through words
+ assertTrue("First element isn't correct", it.next().equals("Jerry"));
+ assertTrue("Second element isn't correct", it.next().equals("Tom"));
+ assertTrue("Nonexistent element is really null", it.next() == null);
+
+ // hasNext() should still have no side effects ...
+ assertFalse("There should be any more elements", it.hasNext());
+ assertFalse("There should be any more elements", it.hasNext());
+ assertFalse("There should be any more elements", it.hasNext());
+
+ // .. and there are really no more words
+ assertTrue("Nonexistent element is really null", it.next() == null);
+ assertTrue("Nonexistent element is really null", it.next() == null);
+ assertTrue("Nonexistent element is really null", it.next() == null);
+ }
+ finally {
+ if (indexReader != null) { indexReader.close(); }
+ }
+ }
+
+ public void testFieldZzz() throws IOException {
+ try {
+ indexReader = IndexReader.open(store);
+
+ ld = new LuceneDictionary(indexReader, "zzz");
+ it = ld.getWordsIterator();
+
+ assertTrue("First element doesn't exist.", it.hasNext());
+ assertTrue("First element isn't correct", it.next().equals("bar"));
+ assertFalse("More elements than expected", it.hasNext());
+ assertTrue("Nonexistent element is really null", it.next() == null);
+ }
+ finally {
+ if (indexReader != null) { indexReader.close(); }
+ }
+ }
+
+ public void testSpellchecker() throws IOException {
+ SpellChecker sc = new SpellChecker(new RAMDirectory());
+ indexReader = IndexReader.open(store);
+ sc.indexDictionary(new LuceneDictionary(indexReader, "contents"));
+ String[] suggestions = sc.suggestSimilar("Tam", 1);
+ assertEquals(1, suggestions.length);
+ assertEquals("Tom", suggestions[0]);
+ suggestions = sc.suggestSimilar("Jarry", 1);
+ assertEquals(1, suggestions.length);
+ assertEquals("Jerry", suggestions[0]);
+ indexReader.close();
+ }
+
+}