You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/17 22:40:59 UTC

svn commit: r986477 - in /lucene/dev/trunk: lucene/contrib/ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/ lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/ solr/src/java/org/apache/solr/handler/component/

Author: gsingers
Date: Tue Aug 17 20:40:58 2010
New Revision: 986477

URL: http://svn.apache.org/viewvc?rev=986477&view=rev
Log:
LUCENE-2479: Add support for alternate comparators for spelling

Added:
    lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java
    lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java
Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
    lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java
    lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java
    lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Aug 17 20:40:58 2010
@@ -20,6 +20,9 @@ New Features
     code is refactored to support append-only FS, and to allow for future
     customization of per-segment information. (Andrzej Bialecki)
 
+  * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
+    with two implementations.  The existing comparator (score, then frequency) is the default (Grant Ingersoll)
+
 ======================= Lucene 3.x (not yet released) =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Tue Aug 17 20:40:58 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.search.spell;
  */
 
 import java.io.IOException;
+import java.util.Comparator;
 import java.util.Iterator;
 
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
@@ -103,6 +104,8 @@ public class SpellChecker implements jav
   
   private StringDistance sd;
 
+  private Comparator<SuggestWord> comparator;
+
   /**
    * Use the given directory as a spell checker index. The directory
    * is created if it doesn't exist yet.
@@ -111,8 +114,7 @@ public class SpellChecker implements jav
    * @throws IOException if Spellchecker can not open the directory
    */
   public SpellChecker(Directory spellIndex, StringDistance sd) throws IOException {
-    setSpellIndex(spellIndex);
-    setStringDistance(sd);
+    this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR);
   }
   /**
    * Use the given directory as a spell checker index with a
@@ -127,6 +129,20 @@ public class SpellChecker implements jav
   public SpellChecker(Directory spellIndex) throws IOException {
     this(spellIndex, new LevensteinDistance());
   }
+
+  /**
+   * Use the given directory as a spell checker index with the given {@link org.apache.lucene.search.spell.StringDistance} measure
+   * and the given {@link java.util.Comparator} for sorting the results.
+   * @param spellIndex The spelling index
+   * @param sd The distance
+   * @param comparator The comparator
+   * @throws IOException if there is a problem opening the index
+   */
+  public SpellChecker(Directory spellIndex, StringDistance sd, Comparator<SuggestWord> comparator) throws IOException {
+    setSpellIndex(spellIndex);
+    setStringDistance(sd);
+    this.comparator = comparator;
+  }
   
   /**
    * Use a different index as the spell checker index or re-open
@@ -151,6 +167,15 @@ public class SpellChecker implements jav
       swapSearcher(spellIndexDir);
     }
   }
+
+  /**
+   * Sets the {@link java.util.Comparator} for the {@link SuggestWordQueue}.
+   * @param comparator the comparator
+   */
+  public void setComparator(Comparator<SuggestWord> comparator) {
+    this.comparator = comparator;
+  }
+
   /**
    * Sets the {@link StringDistance} implementation for this
    * {@link SpellChecker} instance.
@@ -271,7 +296,7 @@ public class SpellChecker implements jav
   //    System.out.println("Q: " + query);
       ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
   //    System.out.println("HITS: " + hits.length());
-      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
+      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
   
       // go thru more than 'maxr' matches in case the distance filter triggers
       int stop = Math.min(hits.length, maxHits);

Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java Tue Aug 17 20:40:58 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.search.spell;
 
+import java.util.Comparator;
+
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,10 +22,13 @@ package org.apache.lucene.search.spell;
 
 /**
  *  SuggestWord, used in suggestSimilar method in SpellChecker class.
+ * <p/>
+ * Default sort is first by score, then by frequency.
  * 
  *
  */
-final class SuggestWord {
+public final class SuggestWord{
+  
   /**
    * the score of the word
    */
@@ -39,23 +44,4 @@ final class SuggestWord {
    */
   public String string;
 
-  public final int compareTo(SuggestWord a) {
-    // first criteria: the edit distance
-    if (score > a.score) {
-      return 1;
-    }
-    if (score < a.score) {
-      return -1;
-    }
-
-    // second criteria (if first criteria is equal): the popularity
-    if (freq > a.freq) {
-      return 1;
-    }
-
-    if (freq < a.freq) {
-      return -1;
-    }
-    return 0;
-  }
-}
+}
\ No newline at end of file

Added: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java?rev=986477&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java (added)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java Tue Aug 17 20:40:58 2010
@@ -0,0 +1,47 @@
+package org.apache.lucene.search.spell;
+
+import java.util.Comparator;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ *  Frequency first, then score.  Must have 
+ *
+ **/
+public class SuggestWordFrequencyComparator implements Comparator<SuggestWord> {
+
+  @Override
+  public int compare(SuggestWord first, SuggestWord second) {
+    // first criteria: the frequency
+    if (first.freq > second.freq) {
+      return 1;
+    }
+    if (first.freq < second.freq) {
+      return -1;
+    }
+
+    // second criteria (if first criteria is equal): the score
+    if (first.score > second.score) {
+      return 1;
+    }
+    if (first.score < second.score) {
+      return -1;
+    }
+    return 0;
+  }
+}

Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java Tue Aug 17 20:40:58 2010
@@ -20,20 +20,44 @@ package org.apache.lucene.search.spell;
 
 import org.apache.lucene.util.PriorityQueue;
 
+import java.util.Comparator;
+
 
 /**
  * Sorts SuggestWord instances
  *
+ * @see org.apache.lucene.search.spell.SuggestWordScoreComparator
+ * @see org.apache.lucene.search.spell.SuggestWordFrequencyComparator
+ *
  */
-final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
+public final class SuggestWordQueue extends PriorityQueue<SuggestWord> {
+  public static final Comparator<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();
+
+
+  private Comparator<SuggestWord> comparator;
+
+  /**
+   * Use the {@link #DEFAULT_COMPARATOR}
+   * @param size The size of the queue
+   */
+  public SuggestWordQueue (int size) {
+    initialize(size);
+    comparator = DEFAULT_COMPARATOR;
+  }
 
-  SuggestWordQueue (int size) {
+  /**
+   * Specify the size of the queue and the comparator to use for sorting.
+   * @param size The size
+   * @param comparator The comparator.
+   */
+  public SuggestWordQueue(int size, Comparator<SuggestWord> comparator){
     initialize(size);
+    this.comparator = comparator;
   }
 
   @Override
   protected final boolean lessThan (SuggestWord wa, SuggestWord wb) {
-    int val = wa.compareTo(wb);
+    int val = comparator.compare(wa, wb);
     return val < 0;
   }
 }

Added: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java?rev=986477&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java (added)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java Tue Aug 17 20:40:58 2010
@@ -0,0 +1,47 @@
+package org.apache.lucene.search.spell;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Comparator;
+
+
+/**
+ * Score first, then frequency
+ *
+ **/
+class SuggestWordScoreComparator implements Comparator<SuggestWord> {
+  @Override
+  public int compare(SuggestWord first, SuggestWord second) {
+    // first criteria: the distance
+    if (first.score > second.score) {
+      return 1;
+    }
+    if (first.score < second.score) {
+      return -1;
+    }
+
+    // second criteria (if first criteria is equal): the popularity
+    if (first.freq > second.freq) {
+      return 1;
+    }
+
+    if (first.freq < second.freq) {
+      return -1;
+    }
+    return 0;
+  }
+}

Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java Tue Aug 17 20:40:58 2010
@@ -20,6 +20,7 @@ package org.apache.lucene.search.spell;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.ExecutorService;
@@ -61,6 +62,7 @@ public class TestSpellChecker extends Lu
       Document doc = new Document();
       doc.add(new Field("field1", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
       doc.add(new Field("field2", English.intToEnglish(i + 1), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand
+      doc.add(new Field("field3", "fvei" + (i % 2 == 0 ? " five" : ""), Field.Store.YES, Field.Index.ANALYZED)); // + word thousand
       writer.addDocument(doc);
     }
     writer.close();
@@ -85,10 +87,10 @@ public class TestSpellChecker extends Lu
 
     spellChecker.clearIndex();
 
-    addwords(r, "field1");
+    addwords(r, spellChecker, "field1");
     int num_field1 = this.numdoc();
 
-    addwords(r, "field2");
+    addwords(r, spellChecker, "field2");
     int num_field2 = this.numdoc();
 
     assertEquals(num_field2, num_field1 + 1);
@@ -110,6 +112,25 @@ public class TestSpellChecker extends Lu
     r.close();
   }
 
+  public void testComparator() throws Exception {
+    IndexReader r = IndexReader.open(userindex, true);
+    Directory compIdx = newDirectory(random);
+    SpellChecker compareSP = new SpellCheckerMock(compIdx, new LevensteinDistance(), new SuggestWordFrequencyComparator());
+    addwords(r, compareSP, "field3");
+
+    String[] similar = compareSP.suggestSimilar("fvie", 2, r, "field3", false);
+    assertTrue(similar.length == 2);
+    //five and fvei have the same score, but different frequencies.
+    assertEquals("fvei", similar[0]);
+    assertEquals("five", similar[1]);
+    r.close();
+    if (!compareSP.isClosed())
+      compareSP.close();
+    compIdx.close();
+
+
+  }
+
   private void checkCommonSuggestions(IndexReader r) throws IOException {
     String[] similar = spellChecker.suggestSimilar("fvie", 2);
     assertTrue(similar.length > 0);
@@ -204,9 +225,9 @@ public class TestSpellChecker extends Lu
     assertEquals(similar[1], "ninety");
   }
 
-  private void addwords(IndexReader r, String field) throws IOException {
+  private void addwords(IndexReader r, SpellChecker sc, String field) throws IOException {
     long time = System.currentTimeMillis();
-    spellChecker.indexDictionary(new LuceneDictionary(r, field));
+    sc.indexDictionary(new LuceneDictionary(r, field));
     time = System.currentTimeMillis() - time;
     //System.out.println("time to build " + field + ": " + time);
   }
@@ -224,9 +245,9 @@ public class TestSpellChecker extends Lu
     IndexReader r = IndexReader.open(userindex, true);
     spellChecker.clearIndex();
     String field = "field1";
-    addwords(r, "field1");
+    addwords(r, spellChecker, "field1");
     int num_field1 = this.numdoc();
-    addwords(r, "field2");
+    addwords(r, spellChecker, "field2");
     int num_field2 = this.numdoc();
     assertEquals(num_field2, num_field1 + 1);
     checkCommonSuggestions(r);
@@ -280,10 +301,10 @@ public class TestSpellChecker extends Lu
     final IndexReader r = IndexReader.open(userindex, true);
     spellChecker.clearIndex();
     assertEquals(2, searchers.size());
-    addwords(r, "field1");
+    addwords(r, spellChecker, "field1");
     assertEquals(3, searchers.size());
     int num_field1 = this.numdoc();
-    addwords(r, "field2");
+    addwords(r, spellChecker, "field2");
     assertEquals(4, searchers.size());
     int num_field2 = this.numdoc();
     assertEquals(num_field2, num_field1 + 1);
@@ -396,6 +417,10 @@ public class TestSpellChecker extends Lu
       super(spellIndex, sd);
     }
 
+    public SpellCheckerMock(Directory spellIndex, StringDistance sd, Comparator<SuggestWord> comparator) throws IOException {
+      super(spellIndex, sd, comparator);
+    }
+
     @Override
     IndexSearcher createSearcher(Directory dir) throws IOException {
       IndexSearcher searcher = super.createSearcher(dir);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=986477&r1=986476&r2=986477&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Tue Aug 17 20:40:58 2010
@@ -24,6 +24,8 @@ import java.util.concurrent.ConcurrentHa
 
 import org.apache.lucene.search.spell.LevensteinDistance;
 import org.apache.lucene.search.spell.StringDistance;
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordQueue;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.solr.client.solrj.response.SpellCheckResponse;
 import org.slf4j.Logger;
@@ -157,59 +159,7 @@ public class SpellCheckComponent extends
     }
   }
 
-  static class SuggestWordQueue extends PriorityQueue {
-    SuggestWordQueue(int size) {
-      initialize(size);
-    }
-
-    @Override
-    protected boolean lessThan(Object a, Object b) {
-      SuggestWord wa = (SuggestWord) a;
-      SuggestWord wb = (SuggestWord) b;
-      int val = wa.compareTo(wb);
-      return val < 0;
-    }
-  }
-
-  /**
-   * Borrowed from Lucene SpellChecker
-   */
-  static class SuggestWord {
-    /**
-     * the score of the word
-     */
-    public float score;
-
-    /**
-     * The freq of the word
-     */
-    public int freq;
-
-    /**
-     * the suggested word
-     */
-    public String string;
 
-    public final int compareTo(SuggestWord a) {
-      // first criteria: the edit distance
-      if (score > a.score) {
-        return 1;
-      }
-      if (score < a.score) {
-        return -1;
-      }
-
-      // second criteria (if first criteria is equal): the popularity
-      if (freq > a.freq) {
-        return 1;
-      }
-
-      if (freq < a.freq) {
-        return -1;
-      }
-      return 0;
-    }
-  }
 
   @Override
   public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {