You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2005/06/22 04:31:34 UTC

svn commit: r191754 - in /lucene/java/trunk/src/java/org/apache/lucene/analysis: WordlistLoader.java standard/StandardAnalyzer.java

Author: otis
Date: Tue Jun 21 19:31:33 2005
New Revision: 191754

URL: http://svn.apache.org/viewcvs?rev=191754&view=rev
Log:
- Applied patch from src/java/org/apache/lucene/analysis/WordlistLoader.java
  (reading word list from a Reader)

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java?rev=191754&r1=191753&r2=191754&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java Tue Jun 21 19:31:33 2005
@@ -19,7 +19,8 @@
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
-import java.io.LineNumberReader;
+import java.io.Reader;
+import java.io.BufferedReader;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Iterator;
@@ -34,30 +35,44 @@
 
   /**
    * Loads a text file and adds every line as an entry to a HashSet (omitting
-   * leading and trailing whitespace). Every line of the file should contain only 
+   * leading and trailing whitespace). Every line of the file should contain only
    * one word. The words need to be in lowercase if you make use of an
    * Analyzer which uses LowerCaseFilter (like GermanAnalyzer).
-   * 
+   *
    * @param wordfile File containing the wordlist
    * @return A HashSet with the file's words
    */
   public static HashSet getWordSet(File wordfile) throws IOException {
     HashSet result = new HashSet();
-    FileReader freader = null;
-    LineNumberReader lnr = null;
+    FileReader reader = null;
     try {
-      freader = new FileReader(wordfile);
-      lnr = new LineNumberReader(freader);
+      reader = new FileReader(wordfile);
+      result = getWordSet(reader);
+    }
+    finally {
+      if (reader != null)
+        reader.close();
+    }
+    return result;
+  }
+
+  public static HashSet getWordSet(Reader reader) throws IOException {
+    HashSet result = new HashSet();
+    BufferedReader br = null;
+    try {
+      if (reader instanceof BufferedReader) {
+        br = (BufferedReader) reader;
+      } else {
+        br = new BufferedReader(reader);
+      }
       String word = null;
-      while ((word = lnr.readLine()) != null) {
+      while ((word = br.readLine()) != null) {
         result.add(word.trim());
       }
     }
     finally {
-      if (lnr != null)
-        lnr.close();
-      if (freader != null)
-        freader.close();
+      if (br != null)
+        br.close();
     }
     return result;
   }
@@ -65,7 +80,7 @@
   /**
    * @param path      Path to the wordlist
    * @param wordfile  Name of the wordlist
-   * 
+   *
    * @deprecated Use {@link #getWordSet(File)} instead
    */
   public static Hashtable getWordtable(String path, String wordfile) throws IOException {
@@ -74,7 +89,7 @@
 
   /**
    * @param wordfile  Complete path to the wordlist
-   * 
+   *
    * @deprecated Use {@link #getWordSet(File)} instead
    */
   public static Hashtable getWordtable(String wordfile) throws IOException {

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=191754&r1=191753&r2=191754&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Jun 21 19:31:33 2005
@@ -51,6 +51,10 @@
     stopSet = WordlistLoader.getWordSet(stopwords);
   }
 
+  public StandardAnalyzer(Reader stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
   /** Constructs a {@link StandardTokenizer} filtered by a {@link
   StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
   public TokenStream tokenStream(String fieldName, Reader reader) {