You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by eh...@apache.org on 2004/03/29 18:53:36 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java WordlistLoader.java
ehatcher 2004/03/29 08:53:35
Modified: src/java/org/apache/lucene/analysis/de GermanAnalyzer.java
WordlistLoader.java
Log:
#27987 - add exceptions to WordlistLoader to missing file causes error instead of silently failing
Revision Changes Path
1.10 +6 -5 jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
Index: GermanAnalyzer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- GermanAnalyzer.java 12 Mar 2004 09:45:17 -0000 1.9
+++ GermanAnalyzer.java 29 Mar 2004 16:53:35 -0000 1.10
@@ -62,6 +62,7 @@
import java.io.File;
import java.io.Reader;
+import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Set;
@@ -82,14 +83,14 @@
*/
private String[] GERMAN_STOP_WORDS = {
"einer", "eine", "eines", "einem", "einen",
- "der", "die", "das", "dass", "da�",
+ "der", "die", "das", "dass", "da�",
"du", "er", "sie", "es",
"was", "wer", "wie", "wir",
"und", "oder", "ohne", "mit",
"am", "im", "in", "aus", "auf",
"ist", "sein", "war", "wird",
"ihr", "ihre", "ihres",
- "als", "f�r", "von", "mit",
+ "als", "f�r", "von", "mit",
"dich", "dir", "mich", "mir",
"mein", "sein", "kein",
"durch", "wegen", "wird"
@@ -129,7 +130,7 @@
/**
* Builds an analyzer with the given stop words.
*/
- public GermanAnalyzer(File stopwords) {
+ public GermanAnalyzer(File stopwords) throws IOException {
stopSet = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());
}
@@ -150,7 +151,7 @@
/**
* Builds an exclusionlist from the words contained in the given file.
*/
- public void setStemExclusionTable(File exclusionlist) {
+ public void setStemExclusionTable(File exclusionlist) throws IOException {
exclusionSet = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());
}
1.7 +21 -19 jakarta-lucene/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
Index: WordlistLoader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/WordlistLoader.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- WordlistLoader.java 12 Mar 2004 09:43:48 -0000 1.6
+++ WordlistLoader.java 29 Mar 2004 16:53:35 -0000 1.7
@@ -62,20 +62,18 @@
/**
* Loads a text file and adds every line as an entry to a Hashtable. Every line
- * should contain only one word. If the file is not found or on any error, an
- * empty table is returned.
- *
- * @author Gerhard Schwarz
- * @version $Id$
+ * should contain only one word.
*
+ * @author Gerhard Schwarz
+ * @version $Id$
* @todo refactor to convert to Sets instead of Hashtable
*/
public class WordlistLoader {
/**
- * @param path Path to the wordlist
- * @param wordfile Name of the wordlist
+ * @param path Path to the wordlist
+ * @param wordfile Name of the wordlist
*/
- public static Hashtable getWordtable(String path, String wordfile) {
+ public static Hashtable getWordtable(String path, String wordfile) throws IOException {
if (path == null || wordfile == null) {
return new Hashtable();
}
@@ -83,9 +81,9 @@
}
/**
- * @param wordfile Complete path to the wordlist
+ * @param wordfile Complete path to the wordlist
*/
- public static Hashtable getWordtable(String wordfile) {
+ public static Hashtable getWordtable(String wordfile) throws IOException {
if (wordfile == null) {
return new Hashtable();
}
@@ -93,16 +91,19 @@
}
/**
- * @param wordfile File containing the wordlist
+ * @param wordfile File containing the wordlist
* @todo Create a Set version of this method
*/
- public static Hashtable getWordtable(File wordfile) {
+ public static Hashtable getWordtable(File wordfile) throws IOException {
if (wordfile == null) {
return new Hashtable();
}
Hashtable result = null;
+ FileReader freader = null;
+ LineNumberReader lnr = null;
try {
- LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
+ freader = new FileReader(wordfile);
+ lnr = new LineNumberReader(freader);
String word = null;
String[] stopwords = new String[100];
int wordcount = 0;
@@ -116,10 +117,11 @@
stopwords[wordcount - 1] = word;
}
result = makeWordTable(stopwords, wordcount);
- }
-// On error, use an empty table
- catch (IOException e) {
- result = new Hashtable();
+ } finally {
+ if (lnr != null)
+ lnr.close();
+ if (freader != null)
+ freader.close();
}
return result;
}
@@ -127,8 +129,8 @@
/**
* Builds the wordlist table.
*
- * @param words Word that where read
- * @param length Amount of words that where read into <tt>words</tt>
+ * @param words Word that where read
+ * @param length Amount of words that where read into <tt>words</tt>
*/
private static Hashtable makeWordTable(String[] words, int length) {
Hashtable table = new Hashtable(length);
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java WordlistLoader.java
Posted by Erik Hatcher <er...@ehatchersolutions.com>.
Yeah, I just noticed the commit diffs myself before reading your
message. Oops.... I'll fix it tonight.
Erik
On Mar 29, 2004, at 1:46 PM, Otis Gospodnetic wrote:
> Erik, check the changes below. The 'sharp s' and the umlauts seem to
> have changed. Maybe something related to your recent IDEA file
> encoding changes.
>
> Otis
>
> --- ehatcher@apache.org wrote:
>> ehatcher 2004/03/29 08:53:35
>>
>> Modified: src/java/org/apache/lucene/analysis/de
>> GermanAnalyzer.java
>> WordlistLoader.java
>> Log:
>> #27987 - add exceptions to WordlistLoader to missing file causes
>> error instead of silently failing
>>
>> Revision Changes Path
>> 1.10 +6 -5
>>
> jakarta-lucene/src/java/org/apache/lucene/analysis/de/
> GermanAnalyzer.java
>>
>> Index: GermanAnalyzer.java
>> ===================================================================
>> RCS file:
>>
> /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/
> GermanAnalyzer.java,v
>> retrieving revision 1.9
>> retrieving revision 1.10
>> diff -u -r1.9 -r1.10
>> --- GermanAnalyzer.java 12 Mar 2004 09:45:17 -0000 1.9
>> +++ GermanAnalyzer.java 29 Mar 2004 16:53:35 -0000 1.10
>> @@ -62,6 +62,7 @@
>>
>> import java.io.File;
>> import java.io.Reader;
>> +import java.io.IOException;
>> import java.util.HashSet;
>> import java.util.Hashtable;
>> import java.util.Set;
>> @@ -82,14 +83,14 @@
>> */
>> private String[] GERMAN_STOP_WORDS = {
>> "einer", "eine", "eines", "einem", "einen",
>> - "der", "die", "das", "dass", "daß",
>> + "der", "die", "das", "dass", "da�",
>> "du", "er", "sie", "es",
>> "was", "wer", "wie", "wir",
>> "und", "oder", "ohne", "mit",
>> "am", "im", "in", "aus", "auf",
>> "ist", "sein", "war", "wird",
>> "ihr", "ihre", "ihres",
>> - "als", "für", "von", "mit",
>> + "als", "f�r", "von", "mit",
>> "dich", "dir", "mich", "mir",
>> "mein", "sein", "kein",
>> "durch", "wegen", "wird"
>> @@ -129,7 +130,7 @@
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java WordlistLoader.java
Posted by Otis Gospodnetic <ot...@yahoo.com>.
Erik, check the changes below. The 'sharp s' and the umlauts seem to
have changed. Maybe something related to your recent IDEA file
encoding changes.
Otis
--- ehatcher@apache.org wrote:
> ehatcher 2004/03/29 08:53:35
>
> Modified: src/java/org/apache/lucene/analysis/de
> GermanAnalyzer.java
> WordlistLoader.java
> Log:
> #27987 - add exceptions to WordlistLoader to missing file causes
> error instead of silently failing
>
> Revision Changes Path
> 1.10 +6 -5
>
jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
>
> Index: GermanAnalyzer.java
> ===================================================================
> RCS file:
>
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java,v
> retrieving revision 1.9
> retrieving revision 1.10
> diff -u -r1.9 -r1.10
> --- GermanAnalyzer.java 12 Mar 2004 09:45:17 -0000 1.9
> +++ GermanAnalyzer.java 29 Mar 2004 16:53:35 -0000 1.10
> @@ -62,6 +62,7 @@
>
> import java.io.File;
> import java.io.Reader;
> +import java.io.IOException;
> import java.util.HashSet;
> import java.util.Hashtable;
> import java.util.Set;
> @@ -82,14 +83,14 @@
> */
> private String[] GERMAN_STOP_WORDS = {
> "einer", "eine", "eines", "einem", "einen",
> - "der", "die", "das", "dass", "da�",
> + "der", "die", "das", "dass", "da�",
> "du", "er", "sie", "es",
> "was", "wer", "wie", "wir",
> "und", "oder", "ohne", "mit",
> "am", "im", "in", "aus", "auf",
> "ist", "sein", "war", "wird",
> "ihr", "ihre", "ihres",
> - "als", "f�r", "von", "mit",
> + "als", "f�r", "von", "mit",
> "dich", "dir", "mich", "mir",
> "mein", "sein", "kein",
> "durch", "wegen", "wird"
> @@ -129,7 +130,7 @@
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org