You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by eh...@apache.org on 2004/03/10 01:18:02 UTC

cvs commit: jakarta-lucene/src/java/org/apache/lucene/analysis/de WordlistLoader.java

ehatcher    2004/03/09 16:18:02

  Modified:    src/java/org/apache/lucene/analysis StopFilter.java
               src/java/org/apache/lucene/analysis/de WordlistLoader.java
  Log:
  convert Hashtable to Set, to avoid unnecessary synchronization issues
  
  Revision  Changes    Path
  1.7       +51 -12    jakarta-lucene/src/java/org/apache/lucene/analysis/StopFilter.java
  
  Index: StopFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/StopFilter.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- StopFilter.java	5 Dec 2003 14:30:12 -0000	1.6
  +++ StopFilter.java	10 Mar 2004 00:18:02 -0000	1.7
  @@ -55,31 +55,55 @@
    */
   
   import java.io.IOException;
  +import java.util.HashSet;
   import java.util.Hashtable;
  +import java.util.Set;
   
  -/** Removes stop words from a token stream. */
  +/**
  + * Removes stop words from a token stream.
  + */
   
   public final class StopFilter extends TokenFilter {
   
  -  private Hashtable table;
  +  private Set table;
   
  -  /** Constructs a filter which removes words from the input
  -   TokenStream that are named in the array of words. */
  +  /**
  +   * Constructs a filter which removes words from the input
  +   * TokenStream that are named in the array of words.
  +   */
     public StopFilter(TokenStream in, String[] stopWords) {
       super(in);
  -    table = makeStopTable(stopWords);
  +    table = makeStopSet(stopWords);
     }
   
  -  /** Constructs a filter which removes words from the input
  -   TokenStream that are named in the Hashtable. */
  +  /**
  +   * Constructs a filter which removes words from the input
  +   * TokenStream that are named in the Hashtable.
  +   *
  +   * @deprecated Use {@link #StopFilter(TokenStream, Set)} StopFilter(TokenStream,Map)} instead
  +   */
     public StopFilter(TokenStream in, Hashtable stopTable) {
       super(in);
  +    table = stopTable.keySet();
  +  }
  +
  +  /**
  +   * Constructs a filter which removes words from the input
  +   * TokenStream that are named in the Set.
  +   */
  +  public StopFilter(TokenStream in, Set stopTable) {
  +    super(in);
       table = stopTable;
     }
   
  -  /** Builds a Hashtable from an array of stop words, appropriate for passing
  -   into the StopFilter constructor.  This permits this table construction to
  -   be cached once when an Analyzer is constructed. */
  +  /**
  +   * Builds a Hashtable from an array of stop words,
  +   * appropriate for passing into the StopFilter constructor.
  +   * This permits this table construction to be cached once when
  +   * an Analyzer is constructed.
  +   *
  +   * @deprecated Use {@link #makeStopSet(String[] makeStopSet) instead.
  +   */
     public static final Hashtable makeStopTable(String[] stopWords) {
       Hashtable stopTable = new Hashtable(stopWords.length);
       for (int i = 0; i < stopWords.length; i++)
  @@ -87,11 +111,26 @@
       return stopTable;
     }
   
  -  /** Returns the next input Token whose termText() is not a stop word. */
  +  /**
  +   * Builds a Set from an array of stop words,
  +   * appropriate for passing into the StopFilter constructor.
  +   * This permits this table construction to be cached once when
  +   * an Analyzer is constructed.
  +   */
  +  public static final Set makeStopSet(String[] stopWords) {
  +    Set stopTable = new HashSet(stopWords.length);
  +    for (int i = 0; i < stopWords.length; i++)
  +      stopTable.add(stopWords[i]);
  +    return stopTable;
  +  }
  +
  +  /**
  +   * Returns the next input Token whose termText() is not a stop word.
  +   */
     public final Token next() throws IOException {
       // return the first non-stop word found
       for (Token token = input.next(); token != null; token = input.next())
  -      if (table.get(token.termText) == null)
  +      if (!table.contains(token.termText))
           return token;
       // reached EOS -- return null
       return null;
  
  
  
  1.5       +60 -61    jakarta-lucene/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
  
  Index: WordlistLoader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/analysis/de/WordlistLoader.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- WordlistLoader.java	18 Aug 2002 17:33:16 -0000	1.4
  +++ WordlistLoader.java	10 Mar 2004 00:18:02 -0000	1.5
  @@ -68,71 +68,70 @@
    * @author    Gerhard Schwarz
    * @version   $Id$
    */
  -public class WordlistLoader
  -{
  -    /**
  -     * @param path      Path to the wordlist
  -     * @param wordfile  Name of the wordlist
  -     */
  -    public static Hashtable getWordtable( String path, String wordfile ) {
  -	if ( path == null || wordfile == null ) {
  -	    return new Hashtable();
  -	}
  -	return getWordtable( new File( path, wordfile ) );
  +public class WordlistLoader {
  +  /**
  +   * @param path      Path to the wordlist
  +   * @param wordfile  Name of the wordlist
  +   */
  +  public static Hashtable getWordtable(String path, String wordfile) {
  +    if (path == null || wordfile == null) {
  +      return new Hashtable();
       }
  +    return getWordtable(new File(path, wordfile));
  +  }
   
  -    /**
  -     * @param wordfile  Complete path to the wordlist
  -     */
  -    public static Hashtable getWordtable( String wordfile ) {
  -	if ( wordfile == null ) {
  -	    return new Hashtable();
  -	}
  -	return getWordtable( new File( wordfile ) );
  +  /**
  +   * @param wordfile  Complete path to the wordlist
  +   */
  +  public static Hashtable getWordtable(String wordfile) {
  +    if (wordfile == null) {
  +      return new Hashtable();
       }
  +    return getWordtable(new File(wordfile));
  +  }
   
  -    /**
  -     * @param wordfile  File containing the wordlist
  -     */
  -    public static Hashtable getWordtable( File wordfile ) {
  -	if ( wordfile == null ) {
  -	    return new Hashtable();
  -	}
  -	Hashtable result = null;
  -	try {
  -	    LineNumberReader lnr = new LineNumberReader( new FileReader( wordfile ) );
  -	    String word = null;
  -	    String[] stopwords = new String[100];
  -	    int wordcount = 0;
  -	    while ( ( word = lnr.readLine() ) != null ) {
  -		wordcount++;
  -		if ( wordcount == stopwords.length ) {
  -		    String[] tmp = new String[stopwords.length + 50];
  -		    System.arraycopy( stopwords, 0, tmp, 0, wordcount );
  -		    stopwords = tmp;
  -		}
  -		stopwords[wordcount-1] = word;
  -	    }
  -	    result = makeWordTable( stopwords, wordcount );
  -	}
  -	// On error, use an empty table
  -	catch ( IOException e ) {
  -	    result = new Hashtable();
  -	}
  -	return result;
  +  /**
  +   * @param wordfile  File containing the wordlist
  +   */
  +  public static Hashtable getWordtable(File wordfile) {
  +    if (wordfile == null) {
  +      return new Hashtable();
       }
  +    Hashtable result = null;
  +    try {
  +      LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
  +      String word = null;
  +      String[] stopwords = new String[100];
  +      int wordcount = 0;
  +      while ((word = lnr.readLine()) != null) {
  +        wordcount++;
  +        if (wordcount == stopwords.length) {
  +          String[] tmp = new String[stopwords.length + 50];
  +          System.arraycopy(stopwords, 0, tmp, 0, wordcount);
  +          stopwords = tmp;
  +        }
  +        stopwords[wordcount - 1] = word;
  +      }
  +      result = makeWordTable(stopwords, wordcount);
  +    }
  +// On error, use an empty table
  +    catch (IOException e) {
  +      result = new Hashtable();
  +    }
  +    return result;
  +  }
   
  -    /**
  -     * Builds the wordlist table.
  -     *
  -     * @param words   Word that where read
  -     * @param length  Amount of words that where read into <tt>words</tt>
  -     */
  -    private static Hashtable makeWordTable( String[] words, int length ) {
  -	Hashtable table = new Hashtable( length );
  -	for ( int i = 0; i < length; i++ ) {
  -	    table.put( words[i], words[i] );
  -	}
  -	return table;
  +  /**
  +   * Builds the wordlist table.
  +   *
  +   * @param words   Word that where read
  +   * @param length  Amount of words that where read into <tt>words</tt>
  +   */
  +  private static Hashtable makeWordTable(String[] words, int length) {
  +    Hashtable table = new Hashtable(length);
  +    for (int i = 0; i < length; i++) {
  +      table.put(words[i], words[i]);
       }
  +    return table;
  +  }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org