You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2005/06/22 04:31:34 UTC
svn commit: r191754 - in
/lucene/java/trunk/src/java/org/apache/lucene/analysis: WordlistLoader.java
standard/StandardAnalyzer.java
Author: otis
Date: Tue Jun 21 19:31:33 2005
New Revision: 191754
URL: http://svn.apache.org/viewcvs?rev=191754&view=rev
Log:
- Applied patch from src/java/org/apache/lucene/analysis/WordlistLoader.java
(reading word list from a Reader)
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java?rev=191754&r1=191753&r2=191754&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/WordlistLoader.java Tue Jun 21 19:31:33 2005
@@ -19,7 +19,8 @@
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
-import java.io.LineNumberReader;
+import java.io.Reader;
+import java.io.BufferedReader;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
@@ -34,30 +35,44 @@
/**
* Loads a text file and adds every line as an entry to a HashSet (omitting
- * leading and trailing whitespace). Every line of the file should contain only
+ * leading and trailing whitespace). Every line of the file should contain only
* one word. The words need to be in lowercase if you make use of an
* Analyzer which uses LowerCaseFilter (like GermanAnalyzer).
- *
+ *
* @param wordfile File containing the wordlist
* @return A HashSet with the file's words
*/
public static HashSet getWordSet(File wordfile) throws IOException {
HashSet result = new HashSet();
- FileReader freader = null;
- LineNumberReader lnr = null;
+ FileReader reader = null;
try {
- freader = new FileReader(wordfile);
- lnr = new LineNumberReader(freader);
+ reader = new FileReader(wordfile);
+ result = getWordSet(reader);
+ }
+ finally {
+ if (reader != null)
+ reader.close();
+ }
+ return result;
+ }
+
+ public static HashSet getWordSet(Reader reader) throws IOException {
+ HashSet result = new HashSet();
+ BufferedReader br = null;
+ try {
+ if (reader instanceof BufferedReader) {
+ br = (BufferedReader) reader;
+ } else {
+ br = new BufferedReader(reader);
+ }
String word = null;
- while ((word = lnr.readLine()) != null) {
+ while ((word = br.readLine()) != null) {
result.add(word.trim());
}
}
finally {
- if (lnr != null)
- lnr.close();
- if (freader != null)
- freader.close();
+ if (br != null)
+ br.close();
}
return result;
}
@@ -65,7 +80,7 @@
/**
* @param path Path to the wordlist
* @param wordfile Name of the wordlist
- *
+ *
* @deprecated Use {@link #getWordSet(File)} instead
*/
public static Hashtable getWordtable(String path, String wordfile) throws IOException {
@@ -74,7 +89,7 @@
/**
* @param wordfile Complete path to the wordlist
- *
+ *
* @deprecated Use {@link #getWordSet(File)} instead
*/
public static Hashtable getWordtable(String wordfile) throws IOException {
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=191754&r1=191753&r2=191754&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Jun 21 19:31:33 2005
@@ -51,6 +51,10 @@
stopSet = WordlistLoader.getWordSet(stopwords);
}
+ public StandardAnalyzer(Reader stopwords) throws IOException {
+ stopSet = WordlistLoader.getWordSet(stopwords);
+ }
+
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
public TokenStream tokenStream(String fieldName, Reader reader) {