You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by eh...@apache.org on 2004/03/29 18:53:36 UTC

cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java WordlistLoader.java

ehatcher    2004/03/29 08:53:35

  Modified:    src/java/org/apache/lucene/analysis/de GermanAnalyzer.java
                        WordlistLoader.java
  Log:
  #27987 - add exceptions to WordlistLoader to missing file causes error instead of silently failing
  
  Revision  Changes    Path
  1.10      +6 -5      jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
  
  Index: GermanAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- GermanAnalyzer.java	12 Mar 2004 09:45:17 -0000	1.9
  +++ GermanAnalyzer.java	29 Mar 2004 16:53:35 -0000	1.10
  @@ -62,6 +62,7 @@
   
   import java.io.File;
   import java.io.Reader;
  +import java.io.IOException;
   import java.util.HashSet;
   import java.util.Hashtable;
   import java.util.Set;
  @@ -82,14 +83,14 @@
      */
     private String[] GERMAN_STOP_WORDS = {
       "einer", "eine", "eines", "einem", "einen",
  -    "der", "die", "das", "dass", "da�",
  +    "der", "die", "das", "dass", "da�",
       "du", "er", "sie", "es",
       "was", "wer", "wie", "wir",
       "und", "oder", "ohne", "mit",
       "am", "im", "in", "aus", "auf",
       "ist", "sein", "war", "wird",
       "ihr", "ihre", "ihres",
  -    "als", "f�r", "von", "mit",
  +    "als", "f�r", "von", "mit",
       "dich", "dir", "mich", "mir",
       "mein", "sein", "kein",
       "durch", "wegen", "wird"
  @@ -129,7 +130,7 @@
     /**
      * Builds an analyzer with the given stop words.
      */
  -  public GermanAnalyzer(File stopwords) {
  +  public GermanAnalyzer(File stopwords) throws IOException {
       stopSet = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());
     }
   
  @@ -150,7 +151,7 @@
     /**
      * Builds an exclusionlist from the words contained in the given file.
      */
  -  public void setStemExclusionTable(File exclusionlist) {
  +  public void setStemExclusionTable(File exclusionlist) throws IOException {
       exclusionSet = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());
     }
   
  
  
  
  1.7       +21 -19    jakarta-lucene/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
  
  Index: WordlistLoader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/WordlistLoader.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- WordlistLoader.java	12 Mar 2004 09:43:48 -0000	1.6
  +++ WordlistLoader.java	29 Mar 2004 16:53:35 -0000	1.7
  @@ -62,20 +62,18 @@
   
   /**
    * Loads a text file and adds every line as an entry to a Hashtable. Every line
  - * should contain only one word. If the file is not found or on any error, an
  - * empty table is returned.
  - *
  - * @author    Gerhard Schwarz
  - * @version   $Id$
  + * should contain only one word.
    *
  + * @author Gerhard Schwarz
  + * @version $Id$
    * @todo refactor to convert to Sets instead of Hashtable
    */
   public class WordlistLoader {
     /**
  -   * @param path      Path to the wordlist
  -   * @param wordfile  Name of the wordlist
  +   * @param path     Path to the wordlist
  +   * @param wordfile Name of the wordlist
      */
  -  public static Hashtable getWordtable(String path, String wordfile) {
  +  public static Hashtable getWordtable(String path, String wordfile) throws IOException {
       if (path == null || wordfile == null) {
         return new Hashtable();
       }
  @@ -83,9 +81,9 @@
     }
   
     /**
  -   * @param wordfile  Complete path to the wordlist
  +   * @param wordfile Complete path to the wordlist
      */
  -  public static Hashtable getWordtable(String wordfile) {
  +  public static Hashtable getWordtable(String wordfile) throws IOException {
       if (wordfile == null) {
         return new Hashtable();
       }
  @@ -93,16 +91,19 @@
     }
   
     /**
  -   * @param wordfile  File containing the wordlist
  +   * @param wordfile File containing the wordlist
      * @todo Create a Set version of this method
      */
  -  public static Hashtable getWordtable(File wordfile) {
  +  public static Hashtable getWordtable(File wordfile) throws IOException {
       if (wordfile == null) {
         return new Hashtable();
       }
       Hashtable result = null;
  +    FileReader freader = null;
  +    LineNumberReader lnr = null;
       try {
  -      LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
  +      freader = new FileReader(wordfile);
  +      lnr = new LineNumberReader(freader);
         String word = null;
         String[] stopwords = new String[100];
         int wordcount = 0;
  @@ -116,10 +117,11 @@
           stopwords[wordcount - 1] = word;
         }
         result = makeWordTable(stopwords, wordcount);
  -    }
  -// On error, use an empty table
  -    catch (IOException e) {
  -      result = new Hashtable();
  +    } finally {
  +      if (lnr != null)
  +        lnr.close();
  +      if (freader != null)
  +        freader.close();
       }
       return result;
     }
  @@ -127,8 +129,8 @@
     /**
      * Builds the wordlist table.
      *
  -   * @param words   Word that where read
  -   * @param length  Amount of words that where read into <tt>words</tt>
  +   * @param words  Word that where read
  +   * @param length Amount of words that where read into <tt>words</tt>
      */
     private static Hashtable makeWordTable(String[] words, int length) {
       Hashtable table = new Hashtable(length);
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java WordlistLoader.java

Posted by Erik Hatcher <er...@ehatchersolutions.com>.
Yeah, I just noticed the commit diffs myself before reading your  
message.  Oops.... I'll fix it tonight.

	Erik

On Mar 29, 2004, at 1:46 PM, Otis Gospodnetic wrote:

> Erik, check the changes below.  The 'sharp s' and the umlauts seem to
> have changed.  Maybe something related to your recent IDEA file
> encoding changes.
>
> Otis
>
> --- ehatcher@apache.org wrote:
>> ehatcher    2004/03/29 08:53:35
>>
>>   Modified:    src/java/org/apache/lucene/analysis/de
>> GermanAnalyzer.java
>>                         WordlistLoader.java
>>   Log:
>>   #27987 - add exceptions to WordlistLoader to missing file causes
>> error instead of silently failing
>>
>>   Revision  Changes    Path
>>   1.10      +6 -5
>>
> jakarta-lucene/src/java/org/apache/lucene/analysis/de/ 
> GermanAnalyzer.java
>>
>>   Index: GermanAnalyzer.java
>>   ===================================================================
>>   RCS file:
>>
> /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/ 
> GermanAnalyzer.java,v
>>   retrieving revision 1.9
>>   retrieving revision 1.10
>>   diff -u -r1.9 -r1.10
>>   --- GermanAnalyzer.java	12 Mar 2004 09:45:17 -0000	1.9
>>   +++ GermanAnalyzer.java	29 Mar 2004 16:53:35 -0000	1.10
>>   @@ -62,6 +62,7 @@
>>
>>    import java.io.File;
>>    import java.io.Reader;
>>   +import java.io.IOException;
>>    import java.util.HashSet;
>>    import java.util.Hashtable;
>>    import java.util.Set;
>>   @@ -82,14 +83,14 @@
>>       */
>>      private String[] GERMAN_STOP_WORDS = {
>>        "einer", "eine", "eines", "einem", "einen",
>>   -    "der", "die", "das", "dass", "daß",
>>   +    "der", "die", "das", "dass", "da�",
>>        "du", "er", "sie", "es",
>>        "was", "wer", "wie", "wir",
>>        "und", "oder", "ohne", "mit",
>>        "am", "im", "in", "aus", "auf",
>>        "ist", "sein", "war", "wird",
>>        "ihr", "ihre", "ihres",
>>   -    "als", "für", "von", "mit",
>>   +    "als", "f�r", "von", "mit",
>>        "dich", "dir", "mich", "mir",
>>        "mein", "sein", "kein",
>>        "durch", "wegen", "wird"
>>   @@ -129,7 +130,7 @@
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de GermanAnalyzer.java WordlistLoader.java

Posted by Otis Gospodnetic <ot...@yahoo.com>.
Erik, check the changes below.  The 'sharp s' and the umlauts seem to
have changed.  Maybe something related to your recent IDEA file
encoding changes.

Otis

--- ehatcher@apache.org wrote:
> ehatcher    2004/03/29 08:53:35
> 
>   Modified:    src/java/org/apache/lucene/analysis/de
> GermanAnalyzer.java
>                         WordlistLoader.java
>   Log:
>   #27987 - add exceptions to WordlistLoader to missing file causes
> error instead of silently failing
>   
>   Revision  Changes    Path
>   1.10      +6 -5     
>
jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
>   
>   Index: GermanAnalyzer.java
>   ===================================================================
>   RCS file:
>
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java,v
>   retrieving revision 1.9
>   retrieving revision 1.10
>   diff -u -r1.9 -r1.10
>   --- GermanAnalyzer.java	12 Mar 2004 09:45:17 -0000	1.9
>   +++ GermanAnalyzer.java	29 Mar 2004 16:53:35 -0000	1.10
>   @@ -62,6 +62,7 @@
>    
>    import java.io.File;
>    import java.io.Reader;
>   +import java.io.IOException;
>    import java.util.HashSet;
>    import java.util.Hashtable;
>    import java.util.Set;
>   @@ -82,14 +83,14 @@
>       */
>      private String[] GERMAN_STOP_WORDS = {
>        "einer", "eine", "eines", "einem", "einen",
>   -    "der", "die", "das", "dass", "da�",
>   +    "der", "die", "das", "dass", "da�",
>        "du", "er", "sie", "es",
>        "was", "wer", "wie", "wir",
>        "und", "oder", "ohne", "mit",
>        "am", "im", "in", "aus", "auf",
>        "ist", "sein", "war", "wird",
>        "ihr", "ihre", "ihres",
>   -    "als", "f�r", "von", "mit",
>   +    "als", "f�r", "von", "mit",
>        "dich", "dir", "mich", "mir",
>        "mein", "sein", "kein",
>        "durch", "wegen", "wird"
>   @@ -129,7 +130,7 @@


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org