You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by eh...@apache.org on 2005/04/12 01:48:03 UTC

svn commit: r160987 - in lucene/java/trunk/contrib/spellchecker/src: java/org/apache/lucene/search/spell/LuceneDictionary.java java/org/apache/lucene/search/spell/PlainTextDictionary.java java/org/apache/lucene/search/spell/SpellChecker.java test/org/apache/lucene/search/spell/TestSpellChecker.java

Author: ehatcher
Date: Mon Apr 11 16:48:02 2005
New Revision: 160987

URL: http://svn.apache.org/viewcvs?view=rev&rev=160987
Log:
Fixed deprecation issues, adjusted test cases to use assertEquals better, reformatted style

Modified:
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
    lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?view=diff&r1=160986&r2=160987
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Mon Apr 11 16:48:02 2005
@@ -1,7 +1,7 @@
 package org.apache.lucene.search.spell;
 
 /**
- * Copyright 2002-2004 The Apache Software Foundation
+ * Copyright 2002-2005 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,78 +17,79 @@
  */
 
 import org.apache.lucene.index.IndexReader;
+
 import java.util.Iterator;
+
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.index.Term;
+
 import java.io.*;
 
 /**
- *  Lucene Dictionnary
+ * Lucene Dictionnary
+ *
  * @author Nicolas Maisonneuve
  */
-public class LuceneDictionary
-implements Dictionary {
-    IndexReader reader;
-    String field;
-
-    public LuceneDictionary (IndexReader reader, String field) {
-        this.reader=reader;
-        this.field=field;
-
+public class LuceneDictionary implements Dictionary {
+  IndexReader reader;
+  String field;
+
+  public LuceneDictionary(IndexReader reader, String field) {
+    this.reader = reader;
+    this.field = field;
+  }
+
+  public final Iterator getWordsIterator() {
+    return new LuceneIterator();
+  }
+
+
+  final class LuceneIterator implements Iterator {
+    private TermEnum enum;
+    private Term actualTerm;
+    private boolean has_next_called;
+
+    public LuceneIterator() {
+      try {
+        enum = reader.terms(new Term(field, ""));
+      } catch (IOException ex) {
+        ex.printStackTrace();
+      }
     }
 
 
-    public final Iterator getWordsIterator () {
-        return new LuceneIterator();
+    public Object next() {
+      if (!has_next_called) {
+        hasNext();
+      }
+      has_next_called = false;
+      return (actualTerm != null) ? actualTerm.text() : null;
     }
 
 
-final  class LuceneIterator    implements Iterator {
-      private  TermEnum enum;
-      private  Term actualTerm;
-      private  boolean has_next_called;
-
-        public LuceneIterator () {
-            try {
-                enum=reader.terms(new Term(field, ""));
-            }
-            catch (IOException ex) {
-                ex.printStackTrace();
-            }
+    public boolean hasNext() {
+      has_next_called = true;
+      try {
+        // if there is still words
+        if (!enum.next()) {
+          actualTerm = null;
+          return false;
         }
-
-
-        public Object next () {
-            if (!has_next_called)  {hasNext();}
-             has_next_called=false;
-            return (actualTerm!=null) ? actualTerm.text(): null;
+        //  if the next word are in the field
+        actualTerm = enum.term();
+        String fieldt = actualTerm.field();
+        if (fieldt != field) {
+          actualTerm = null;
+          return false;
         }
-
-
-        public boolean hasNext () {
-             has_next_called=true;
-            try {
-                // if there is still words
-                if (!enum.next()) {
-                    actualTerm=null;
-                    return false;
-                }
-                //  if the next word are in the field
-                actualTerm=enum.term();
-                String fieldt=actualTerm.field();
-                if (fieldt!=field) {
-                    actualTerm=null;
-                    return false;
-                }
-                return true;
-            }
-            catch (IOException ex) {
-                ex.printStackTrace();
-                return false;
-            }
-        }
-
-
-        public void remove () {};
+        return true;
+      } catch (IOException ex) {
+        ex.printStackTrace();
+        return false;
+      }
     }
+
+    public void remove() {
+    };
+  }
 }

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java?view=diff&r1=160986&r2=160987
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java Mon Apr 11 16:48:02 2005
@@ -1,7 +1,7 @@
 package org.apache.lucene.search.spell;
 
 /**
- * Copyright 2002-2004 The Apache Software Foundation
+ * Copyright 2002-2005 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,52 +35,48 @@
  */
 public class PlainTextDictionary implements Dictionary {
 
-    private BufferedReader in;
-    private String line;
-    private boolean has_next_called;
-
-    public PlainTextDictionary (File file) throws FileNotFoundException {
-        in=new BufferedReader(new FileReader(file));
-    }
-
-
-    public PlainTextDictionary (InputStream dictFile) {
-        in=new BufferedReader(new InputStreamReader(dictFile));
+  private BufferedReader in;
+  private String line;
+  private boolean has_next_called;
+
+  public PlainTextDictionary(File file) throws FileNotFoundException {
+    in = new BufferedReader(new FileReader(file));
+  }
+
+  public PlainTextDictionary(InputStream dictFile) {
+    in = new BufferedReader(new InputStreamReader(dictFile));
+  }
+
+  public Iterator getWordsIterator() {
+    return new fileIterator();
+  }
+
+
+  final class fileIterator implements Iterator {
+    public Object next() {
+      if (!has_next_called) {
+        hasNext();
+      }
+      has_next_called = false;
+      return line;
     }
 
 
-    public Iterator getWordsIterator () {
-
-        return new fileIterator();
+    public boolean hasNext() {
+      has_next_called = true;
+      try {
+        line = in.readLine();
+      } catch (IOException ex) {
+        ex.printStackTrace();
+        line = null;
+        return false;
+      }
+      return (line != null) ? true : false;
     }
 
 
-    final class fileIterator
-    implements Iterator {
-        public Object next () {
-            if (!has_next_called) {
-                hasNext();
-            }
-            has_next_called=false;
-            return line;
-        }
-
-
-        public boolean hasNext () {
-            has_next_called=true;
-            try {
-                line=in.readLine();
-            }
-            catch (IOException ex) {
-                ex.printStackTrace();
-                line=null;
-                return false;
-            }
-            return (line!=null)?true:false;
-        }
-
-
-        public void remove () {};
-    }
+    public void remove() {
+    };
+  }
 
 }

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?view=diff&r1=160986&r2=160987
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Mon Apr 11 16:48:02 2005
@@ -26,7 +26,6 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Hits;
@@ -113,7 +112,7 @@
      * Suggest similar words (restricted or not of a field of a user index)
      * @param word String the word you want a spell check done on
      * @param num_sug int the number of suggest words
-     * @param IndexReader the indexReader of the user index (can be null see field param)
+     * @param ir the indexReader of the user index (can be null see field param)
      * @param field String the field of the user index: if field is not null ,the suggest
      * words are restricted to the words present in this field.
      * @param morePopular boolean return only the suggest words that are more frequent than the searched word
@@ -214,7 +213,7 @@
     private static void add (BooleanQuery q, String k, String v, float boost) {
         Query tq=new TermQuery(new Term(k, v));
         tq.setBoost(boost);
-        q.add(new BooleanClause(tq, false, false));
+        q.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
     }
 
 
@@ -222,7 +221,7 @@
      * Add a clause to a boolean query.
      */
     private static void add (BooleanQuery q, String k, String v) {
-        q.add(new BooleanClause(new TermQuery(new Term(k, v)), false, false));
+        q.add(new BooleanClause(new TermQuery(new Term(k, v)), BooleanClause.Occur.SHOULD));
     }
 
 
@@ -269,12 +268,10 @@
      * @throws IOException
      */
     public void indexDictionnary (Dictionary dict) throws IOException {
-
-        int ng1, ng2;
         IndexReader.unlock(spellindex);
         IndexWriter writer=new IndexWriter(spellindex, new WhitespaceAnalyzer(), !IndexReader.indexExists(spellindex));
-        writer.mergeFactor=300;
-        writer.minMergeDocs=150;
+        writer.setMergeFactor(300);
+        writer.setMaxBufferedDocs(150);
 
         Iterator iter=dict.getWordsIterator();
         while (iter.hasNext()) {
@@ -328,7 +325,7 @@
 
     private static Document createDocument (String text, int ng1, int ng2) {
         Document doc=new Document();
-        doc.add(Field.Keyword(F_WORD, text)); // orig term
+        doc.add(new Field(F_WORD, text, Field.Store.YES, Field.Index.UN_TOKENIZED)); // orig term
         addGram(text, doc, ng1, ng2);
         return doc;
     }
@@ -341,14 +338,14 @@
             String end=null;
             for (int i=0; i<len-ng+1; i++) {
                 String gram=text.substring(i, i+ng);
-                doc.add(Field.Keyword(key, gram));
+                doc.add(new Field(key, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
                 if (i==0) {
-                    doc.add(Field.Keyword("start"+ng, gram));
+                    doc.add(new Field("start"+ng, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
                 }
                 end=gram;
             }
             if (end!=null) { // may not be present if len==ng1
-                doc.add(Field.Keyword("end"+ng, end));
+                doc.add(new Field("end"+ng, end, Field.Store.YES, Field.Index.UN_TOKENIZED));
             }
         }
     }

Modified: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?view=diff&r1=160986&r2=160987
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java Mon Apr 11 16:48:02 2005
@@ -2,7 +2,6 @@
 
 
 import junit.framework.*;
-import org.apache.lucene.search.spell.*;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.analysis.SimpleAnalyzer;
@@ -10,113 +9,108 @@
 import org.apache.lucene.util.English;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
+
 import java.io.IOException;
-import org.apache.lucene.store.FSDirectory;
+
 import org.apache.lucene.store.Directory;
-import java.io.File;
 
 
 /**
  * Test case
+ *
  * @author Nicolas Maisonneuve
  */
-
-public class TestSpellChecker
-extends TestCase {
-    private SpellChecker spellChecker;
-    Directory userindex, spellindex;
-
-    protected void setUp () throws Exception {
-        super.setUp();
-
-        //create a user index
-        userindex=new RAMDirectory();
-        IndexWriter writer=new IndexWriter(userindex, new SimpleAnalyzer(), true);
-
-        for (int i=0; i<1000; i++) {
-            Document doc=new Document();
-            doc.add(Field.Text("field1", English.intToEnglish(i)));
-            doc.add(Field.Text("field2", English.intToEnglish(i+1))); // + word thousand
-            writer.addDocument(doc);
-        }
-        writer.close();
-
-        // create the spellChecker
-        File file=new File("d://test");
-        spellindex=FSDirectory.getDirectory(file, true);
-        spellChecker=new SpellChecker(spellindex);
+public class TestSpellChecker extends TestCase {
+  private SpellChecker spellChecker;
+  Directory userindex, spellindex;
+
+  protected void setUp() throws Exception {
+    super.setUp();
+
+    //create a user index
+    userindex = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);
+
+    for (int i = 0; i < 1000; i++) {
+      Document doc = new Document();
+      doc.add(new Field("field1", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
+      doc.add(new Field("field2", English.intToEnglish(i + 1), Field.Store.YES, Field.Index.TOKENIZED)); // + word thousand
+      writer.addDocument(doc);
     }
+    writer.close();
 
+    // create the spellChecker
+    spellindex = new RAMDirectory();
+    spellChecker = new SpellChecker(spellindex);
+  }
 
-    public void testBuild () {
-        try {
-            IndexReader r=IndexReader.open(userindex);
 
-            spellChecker.clearIndex();
+  public void testBuild() {
+    try {
+      IndexReader r = IndexReader.open(userindex);
 
-            addwords(r, "field1");
-            int num_field1=this.numdoc();
+      spellChecker.clearIndex();
 
-            addwords(r, "field2");
-            int num_field2=this.numdoc();
+      addwords(r, "field1");
+      int num_field1 = this.numdoc();
 
-            this.assertTrue(num_field2==num_field1+1);
+      addwords(r, "field2");
+      int num_field2 = this.numdoc();
 
-            // test small word
-            String[] l=spellChecker.suggestSimilar("fvie", 2);
-            this.assertTrue(l[0].equals("five"));
+      assertEquals(num_field2, num_field1 + 1);
 
-            l=spellChecker.suggestSimilar("fiv", 2);
-            this.assertTrue(l[0].equals("five"));
+      // test small word
+      String[] similar = spellChecker.suggestSimilar("fvie", 2);
+      assertEquals(similar[0], "five");
 
-            l=spellChecker.suggestSimilar("ive", 2);
-            this.assertTrue(l[0].equals("five"));
+      similar = spellChecker.suggestSimilar("fiv", 2);
+      assertEquals(similar[0], "five");
 
-            l=spellChecker.suggestSimilar("fives", 2);
-            this.assertTrue(l[0].equals("five"));
+      similar = spellChecker.suggestSimilar("ive", 2);
+      assertEquals(similar[0], "five");
 
-            l=spellChecker.suggestSimilar("fie", 2);
-            this.assertTrue(l[0].equals("five"));
+      similar = spellChecker.suggestSimilar("fives", 2);
+      assertEquals(similar[0], "five");
 
-            l=spellChecker.suggestSimilar("fi", 2);
-            this.assertEquals(0,l.length);
+      similar = spellChecker.suggestSimilar("fie", 2);
+      assertEquals(similar[0], "five");
 
-            // test restreint to a field
-            l=spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
-            this.assertEquals(0,l.length); // there isn't the term thousand in the field field1
+      similar = spellChecker.suggestSimilar("fi", 2);
+      assertEquals(0, similar.length);
 
-            l=spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
-            this.assertEquals(1,l.length); // there is the term thousand in the field field2
-        }
-        catch (IOException e) {
-            e.printStackTrace();
-            this.assertTrue(false);
-        }
+      // test restraint to a field
+      similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
+      assertEquals(0, similar.length); // there isn't the term thousand in the field field1
 
+      similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
+      assertEquals(1, similar.length); // there is the term thousand in the field field2
+    } catch (IOException e) {
+      e.printStackTrace();
+      fail();
     }
 
-
-    private void addwords (IndexReader r, String field) throws IOException {
-        long time=System.currentTimeMillis();
-        spellChecker.indexDictionnary(new LuceneDictionary(r, field));
-        time=System.currentTimeMillis()-time;
-        System.out.println("time to build "+field+": "+time);
-    }
+  }
 
 
-    private int numdoc () throws IOException {
-        IndexReader rs=IndexReader.open(spellindex);
-        int num=rs.numDocs();
-        this.assertTrue(num!=0);
-        System.out.println("num docs: "+num);
-        rs.close();
-        return num;
-    }
-
-
-    protected void tearDown () throws Exception {
-        spellChecker=null;
-        super.tearDown();
-    }
+  private void addwords(IndexReader r, String field) throws IOException {
+    long time = System.currentTimeMillis();
+    spellChecker.indexDictionnary(new LuceneDictionary(r, field));
+    time = System.currentTimeMillis() - time;
+    System.out.println("time to build " + field + ": " + time);
+  }
+
+  private int numdoc() throws IOException {
+    IndexReader rs = IndexReader.open(spellindex);
+    int num = rs.numDocs();
+    assertTrue(num != 0);
+    System.out.println("num docs: " + num);
+    rs.close();
+    return num;
+  }
+
+  protected void tearDown() throws Exception {
+    spellChecker = null;
+    super.tearDown();
+  }
 
 }