You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/17 22:40:59 UTC
svn commit: r986477 - in /lucene/dev/trunk: lucene/contrib/
lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/
lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/
solr/src/java/org/apache/solr/handler/component/
Author: gsingers
Date: Tue Aug 17 20:40:58 2010
New Revision: 986477
URL: http://svn.apache.org/viewvc?rev=986477&view=rev
Log:
LUCENE-2479: Add support for alternate comparators for spelling
Added:
lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java
lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java
lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java
lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Aug 17 20:40:58 2010
@@ -20,6 +20,9 @@ New Features
code is refactored to support append-only FS, and to allow for future
customization of per-segment information. (Andrzej Bialecki)
+ * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
+ with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll)
+
======================= Lucene 3.x (not yet released) =======================
Changes in backwards compatibility policy
Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Tue Aug 17 20:40:58 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.search.spell;
*/
import java.io.IOException;
+import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
@@ -103,6 +104,8 @@ public class SpellChecker implements jav
private StringDistance sd;
+ private Comparator<SuggestWord> comparator;
+
/**
* Use the given directory as a spell checker index. The directory
* is created if it doesn't exist yet.
@@ -111,8 +114,7 @@ public class SpellChecker implements jav
* @throws IOException if Spellchecker can not open the directory
*/
public SpellChecker(Directory spellIndex, StringDistance sd) throws IOException {
- setSpellIndex(spellIndex);
- setStringDistance(sd);
+ this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR);
}
/**
* Use the given directory as a spell checker index with a
@@ -127,6 +129,20 @@ public class SpellChecker implements jav
public SpellChecker(Directory spellIndex) throws IOException {
this(spellIndex, new LevensteinDistance());
}
+
+ /**
+ * Use the given directory as a spell checker index with the given {@link org.apache.lucene.search.spell.StringDistance} measure
+ * and the given {@link java.util.Comparator} for sorting the results.
+ * @param spellIndex The spelling index
+ * @param sd The distance
+ * @param comparator The comparator
+ * @throws IOException if there is a problem opening the index
+ */
+ public SpellChecker(Directory spellIndex, StringDistance sd, Comparator<SuggestWord> comparator) throws IOException {
+ setSpellIndex(spellIndex);
+ setStringDistance(sd);
+ this.comparator = comparator;
+ }
/**
* Use a different index as the spell checker index or re-open
@@ -151,6 +167,15 @@ public class SpellChecker implements jav
swapSearcher(spellIndexDir);
}
}
+
+ /**
+ * Sets the {@link java.util.Comparator} for the {@link SuggestWordQueue}.
+ * @param comparator the comparator
+ */
+ public void setComparator(Comparator<SuggestWord> comparator) {
+ this.comparator = comparator;
+ }
+
/**
* Sets the {@link StringDistance} implementation for this
* {@link SpellChecker} instance.
@@ -271,7 +296,7 @@ public class SpellChecker implements jav
// System.out.println("Q: " + query);
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
// System.out.println("HITS: " + hits.length());
- SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
+ SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
// go thru more than 'maxr' matches in case the distance filter triggers
int stop = Math.min(hits.length, maxHits);
Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java Tue Aug 17 20:40:58 2010
@@ -1,5 +1,7 @@
package org.apache.lucene.search.spell;
+import java.util.Comparator;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,10 +22,13 @@ package org.apache.lucene.search.spell;
/**
* SuggestWord, used in suggestSimilar method in SpellChecker class.
+ * <p/>
+ * Default sort is first by score, then by frequency.
*
*
*/
-final class SuggestWord {
+public final class SuggestWord{
+
/**
* the score of the word
*/
@@ -39,23 +44,4 @@ final class SuggestWord {
*/
public String string;
- public final int compareTo(SuggestWord a) {
- // first criteria: the edit distance
- if (score > a.score) {
- return 1;
- }
- if (score < a.score) {
- return -1;
- }
-
- // second criteria (if first criteria is equal): the popularity
- if (freq > a.freq) {
- return 1;
- }
-
- if (freq < a.freq) {
- return -1;
- }
- return 0;
- }
-}
+}
\ No newline at end of file
Added: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java?rev=986477&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java (added)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java Tue Aug 17 20:40:58 2010
@@ -0,0 +1,47 @@
+package org.apache.lucene.search.spell;
+
+import java.util.Comparator;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Frequency first, then score. Must have
+ *
+ **/
+public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
+
+ @Override
+ public int compare(SuggestWord first, SuggestWord second) {
+ // first criteria: the frequency
+ if (first.freq > second.freq) {
+ return 1;
+ }
+ if (first.freq < second.freq) {
+ return -1;
+ }
+
+ // second criteria (if first criteria is equal): the score
+ if (first.score > second.score) {
+ return 1;
+ }
+ if (first.score < second.score) {
+ return -1;
+ }
+ return 0;
+ }
+}
Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java Tue Aug 17 20:40:58 2010
@@ -20,20 +20,44 @@ package org.apache.lucene.search.spell;
import org.apache.lucene.util.PriorityQueue;
+import java.util.Comparator;
+
/**
* Sorts SuggestWord instances
*
+ * @see org.apache.lucene.search.spell.SuggestWordScoreComparator
+ * @see org.apache.lucene.search.spell.SuggestWordFrequencyComparator
+ *
*/
-final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
+public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
+ public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();
+
+
+ private Comparator<SuggestWord> comparator;
+
+ /**
+ * Use the {@link #DEFAULT_COMPARATOR}
+ * @param size The size of the queue
+ */
+ public SuggestWordQueue (int size) {
+ initialize(size);
+ comparator = DEFAULT_COMPARATOR;
+ }
- SuggestWordQueue (int size) {
+ /**
+ * Specify the size of the queue and the comparator to use for sorting.
+ * @param size The size
+ * @param comparator The comparator.
+ */
+ public SuggestWordQueue(int size, Comparator<SuggestWord> comparator){
initialize(size);
+ this.comparator = comparator;
}
@Override
protected final boolean lessThan (SuggestWord wa, SuggestWord wb) {
- int val = wa.compareTo(wb);
+ int val = comparator.compare(wa, wb);
return val < 0;
}
}
Added: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java?rev=986477&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java (added)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java Tue Aug 17 20:40:58 2010
@@ -0,0 +1,47 @@
+package org.apache.lucene.search.spell;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Comparator;
+
+
+/**
+ * Score first, then frequency
+ *
+ **/
+class SuggestWordScoreComparator implements Comparator<SuggestWord> {
+ @Override
+ public int compare(SuggestWord first, SuggestWord second) {
+ // first criteria: the distance
+ if (first.score > second.score) {
+ return 1;
+ }
+ if (first.score < second.score) {
+ return -1;
+ }
+
+ // second criteria (if first criteria is equal): the popularity
+ if (first.freq > second.freq) {
+ return 1;
+ }
+
+ if (first.freq < second.freq) {
+ return -1;
+ }
+ return 0;
+ }
+}
Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java Tue Aug 17 20:40:58 2010
@@ -20,6 +20,7 @@ package org.apache.lucene.search.spell;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Comparator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutorService;
@@ -61,6 +62,7 @@ public class TestSpellChecker extends Lu
Document doc = new Document();
doc.add(new Field("field1", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("field2", English.intToEnglish(i + 1), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand
+ doc.add(new Field("field3", "fvei" + (i % 2 == 0 ? " five" : ""), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand
writer.addDocument(doc);
}
writer.close();
@@ -85,10 +87,10 @@ public class TestSpellChecker extends Lu
spellChecker.clearIndex();
- addwords(r, "field1");
+ addwords(r, spellChecker, "field1");
int num_field1 = this.numdoc();
- addwords(r, "field2");
+ addwords(r, spellChecker, "field2");
int num_field2 = this.numdoc();
assertEquals(num_field2, num_field1 + 1);
@@ -110,6 +112,25 @@ public class TestSpellChecker extends Lu
r.close();
}
+ public void testComparator() throws Exception {
+ IndexReader r = IndexReader.open(userindex, true);
+ Directory compIdx = newDirectory(random);
+ SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator());
+ addwords(r, compareSP, "field3");
+
+ String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3", false);
+ assertTrue(similar.length == 2);
+ //five and fvei have the same score, but different frequencies.
+ assertEquals("fvei", similar[0]);
+ assertEquals("five", similar[1]);
+ r.close();
+ if (!compareSP.isClosed())
+ compareSP.close();
+ compIdx.close();
+
+
+ }
+
private void checkCommonSuggestions(IndexReader r) throws IOException {
String[] similar = spellChecker.suggestSimilar("fvie", 2);
assertTrue(similar.length > 0);
@@ -204,9 +225,9 @@ public class TestSpellChecker extends Lu
assertEquals(similar[1], "ninety");
}
- private void addwords(IndexReader r, String field) throws IOException {
+ private void addwords(IndexReader r, SpellChecker sc, String field) throws IOException {
long time = System.currentTimeMillis();
- spellChecker.indexDictionary(new LuceneDictionary(r, field));
+ sc.indexDictionary(new LuceneDictionary(r, field));
time = System.currentTimeMillis() - time;
//System.out.println("time to build " + field + ": " + time);
}
@@ -224,9 +245,9 @@ public class TestSpellChecker extends Lu
IndexReader r = IndexReader.open(userindex, true);
spellChecker.clearIndex();
String field = "field1";
- addwords(r, "field1");
+ addwords(r, spellChecker, "field1");
int num_field1 = this.numdoc();
- addwords(r, "field2");
+ addwords(r, spellChecker, "field2");
int num_field2 = this.numdoc();
assertEquals(num_field2, num_field1 + 1);
checkCommonSuggestions(r);
@@ -280,10 +301,10 @@ public class TestSpellChecker extends Lu
final IndexReader r = IndexReader.open(userindex, true);
spellChecker.clearIndex();
assertEquals(2, searchers.size());
- addwords(r, "field1");
+ addwords(r, spellChecker, "field1");
assertEquals(3, searchers.size());
int num_field1 = this.numdoc();
- addwords(r, "field2");
+ addwords(r, spellChecker, "field2");
assertEquals(4, searchers.size());
int num_field2 = this.numdoc();
assertEquals(num_field2, num_field1 + 1);
@@ -396,6 +417,10 @@ public class TestSpellChecker extends Lu
super(spellIndex, sd);
}
+ public SpellCheckerMock(Directory spellIndex, StringDistance sd, Comparator<SuggestWord> comparator) throws IOException {
+ super(spellIndex, sd, comparator);
+ }
+
@Override
IndexSearcher createSearcher(Directory dir) throws IOException {
IndexSearcher searcher = super.createSearcher(dir);
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Tue Aug 17 20:40:58 2010
@@ -24,6 +24,8 @@ import java.util.concurrent.ConcurrentHa
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.StringDistance;
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.slf4j.Logger;
@@ -157,59 +159,7 @@ public class SpellCheckComponent extends
}
}
- static class SuggestWordQueue extends PriorityQueue {
- SuggestWordQueue(int size) {
- initialize(size);
- }
-
- @Override
- protected boolean lessThan(Object a, Object b) {
- SuggestWord wa = (SuggestWord) a;
- SuggestWord wb = (SuggestWord) b;
- int val = wa.compareTo(wb);
- return val < 0;
- }
- }
-
- /**
- * Borrowed from Lucene SpellChecker
- */
- static class SuggestWord {
- /**
- * the score of the word
- */
- public float score;
-
- /**
- * The freq of the word
- */
- public int freq;
-
- /**
- * the suggested word
- */
- public String string;
- public final int compareTo(SuggestWord a) {
- // first criteria: the edit distance
- if (score > a.score) {
- return 1;
- }
- if (score < a.score) {
- return -1;
- }
-
- // second criteria (if first criteria is equal): the popularity
- if (freq > a.freq) {
- return 1;
- }
-
- if (freq < a.freq) {
- return -1;
- }
- return 0;
- }
- }
@Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {