You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/18 16:52:02 UTC
svn commit: r986713 - in /lucene/dev/branches/branch_3x:
lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/ solr/
solr/example/solr/conf/ solr/src/java/org/apache/solr/spelling/
solr/src/test/org/apache/solr/spelling/ solr/src/test/te...
Author: gsingers
Date: Wed Aug 18 14:52:01 2010
New Revision: 986713
URL: http://svn.apache.org/viewvc?rev=986713&view=rev
Log:
SOLR-2053: hook in support for specifying comparators
Added:
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java (with props)
Modified:
lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml
Modified: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Wed Aug 18 14:52:01 2010
@@ -176,6 +176,10 @@ public class SpellChecker implements jav
this.comparator = comparator;
}
+ public Comparator<SuggestWord> getComparator() {
+ return comparator;
+ }
+
/**
* Sets the {@link StringDistance} implementation for this
* {@link SpellChecker} instance.
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Wed Aug 18 14:52:01 2010
@@ -193,6 +193,8 @@ New Features
* SOLR-2030: Make FastVectorHighlighter use of SolrEncoder. (koji)
+* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
+
Optimizations
----------------------
Modified: lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/branch_3x/solr/example/solr/conf/solrconfig.xml Wed Aug 18 14:52:01 2010
@@ -704,6 +704,19 @@
<str name="spellcheckIndexDir">./spellchecker2</str>
</lst>
-->
+ <!-- Use an alternate comparator -->
+ <!--<lst name="spellchecker">
+ <str name="name">freq</str>
+ <str name="field">lowerfilt</str>
+ <str name="spellcheckIndexDir">spellcheckerFreq</str>
+ <!– comparatorClass be one of:
+ 1. score (default)
+ 2. freq (Frequency first, then score)
+ 3. A fully qualified class name
+ –>
+ <str name="comparatorClass">freq</str>
+ <str name="buildOnCommit">true</str>
+ -->
<!-- a file based spell checker
<lst name="spellchecker">
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java Wed Aug 18 14:52:01 2010
@@ -22,7 +22,12 @@ import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Comparator;
import java.util.List;
+
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
+import org.apache.lucene.search.spell.SuggestWordQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -60,6 +65,11 @@ public abstract class AbstractLuceneSpel
public static final String ACCURACY = "accuracy";
public static final String STRING_DISTANCE = "distanceMeasure";
public static final String FIELD_TYPE = "fieldType";
+ public static final String COMPARATOR_CLASS = "comparatorClass";
+
+ public static final String SCORE_COMP = "score";
+ public static final String FREQ_COMP = "freq";
+
protected String field;
protected String fieldTypeName;
protected org.apache.lucene.search.spell.SpellChecker spellChecker;
@@ -89,6 +99,19 @@ public abstract class AbstractLuceneSpel
}
}
sourceLocation = (String) config.get(LOCATION);
+ String compClass = (String) config.get(COMPARATOR_CLASS);
+ Comparator<SuggestWord> comp = null;
+ if (compClass != null){
+ if (compClass.equalsIgnoreCase(SCORE_COMP)){
+ comp = SuggestWordQueue.DEFAULT_COMPARATOR;
+ } else if (compClass.equalsIgnoreCase(FREQ_COMP)){
+ comp = new SuggestWordFrequencyComparator();
+ } else{//must be a FQCN
+ comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass);
+ }
+ } else {
+ comp = SuggestWordQueue.DEFAULT_COMPARATOR;
+ }
field = (String) config.get(FIELD);
String strDistanceName = (String)config.get(STRING_DISTANCE);
if (strDistanceName != null) {
@@ -99,7 +122,7 @@ public abstract class AbstractLuceneSpel
}
try {
initIndex();
- spellChecker = new SpellChecker(index, sd);
+ spellChecker = new SpellChecker(index, sd, comp);
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -230,4 +253,8 @@ public abstract class AbstractLuceneSpel
public StringDistance getStringDistance() {
return sd;
}
+
+ public SpellChecker getSpellChecker() {
+ return spellChecker;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java Wed Aug 18 14:52:01 2010
@@ -27,10 +27,14 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.StringDistance;
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.SearchComponent;
+import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.util.RefCounted;
import org.apache.solr.search.SolrIndexSearcher;
import org.junit.AfterClass;
@@ -39,6 +43,7 @@ import org.junit.Test;
import java.io.File;
import java.util.Collection;
+import java.util.Comparator;
import java.util.Date;
import java.util.Map;
@@ -76,6 +81,27 @@ public class IndexBasedSpellCheckerTest
}
@Test
+ public void testComparator() throws Exception {
+ SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
+ assertNotNull(component);
+ AbstractLuceneSpellChecker spellChecker;
+ Comparator<SuggestWord> comp;
+ spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
+ assertNotNull(spellChecker);
+ comp = spellChecker.getSpellChecker().getComparator();
+ assertNotNull(comp);
+ assertTrue(comp instanceof SuggestWordFrequencyComparator);
+
+ spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
+ assertNotNull(spellChecker);
+ comp = spellChecker.getSpellChecker().getComparator();
+ assertNotNull(comp);
+ assertTrue(comp instanceof SampleComparator);
+
+
+ }
+
+ @Test
public void testSpelling() throws Exception {
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
Added: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java?rev=986713&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java Wed Aug 18 14:52:01 2010
@@ -0,0 +1,36 @@
+package org.apache.solr.spelling;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.spell.SuggestWord;
+
+import java.util.Comparator;
+
+
+/**
+ * Comparator for testing purposes
+ *
+ **/
+public class SampleComparator implements Comparator<SuggestWord> {
+
+
+ @Override
+ public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
+ return suggestWord.string.compareTo(suggestWord1.string);
+ }
+
+}
Propchange: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/spelling/SampleComparator.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml?rev=986713&r1=986712&r2=986713&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/test-files/solr/conf/solrconfig.xml Wed Aug 18 14:52:01 2010
@@ -357,6 +357,27 @@
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
+ <!-- Comparator -->
+ <lst name="spellchecker">
+ <str name="name">freq</str>
+ <str name="field">lowerfilt</str>
+ <str name="spellcheckIndexDir">spellcheckerFreq</str>
+ <!-- comparatorClass be one of:
+ 1. score (default)
+ 2. freq (Frequency first, then score)
+ 3. A fully qualified class name
+ -->
+ <str name="comparatorClass">freq</str>
+ <str name="buildOnCommit">true</str>
+ </lst>
+ <lst name="spellchecker">
+ <str name="name">fqcn</str>
+ <str name="field">lowerfilt</str>
+ <str name="spellcheckIndexDir">spellcheckerFQCN</str>
+ <str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
+ <str name="buildOnCommit">true</str>
+ </lst>
+
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>