You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by dn...@apache.org on 2004/10/17 13:41:41 UTC

cvs commit: jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk CJKAnalyzer.java

dnaber      2004/10/17 04:41:41

  Modified:    contributions/analyzers/src/java/org/apache/lucene/analysis/fr
                        FrenchAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/br
                        BrazilianAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/nl
                        DutchAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/cz
                        CzechAnalyzer.java
               contributions/analyzers/src/java/org/apache/lucene/analysis/cjk
                        CJKAnalyzer.java
  Log:
  improve doc for default constructor; make stop word list public but final
  
  Revision  Changes    Path
  1.9       +3 -3      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
  
  Index: FrenchAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- FrenchAnalyzer.java	17 Aug 2004 21:34:47 -0000	1.8
  +++ FrenchAnalyzer.java	17 Oct 2004 11:41:40 -0000	1.9
  @@ -84,7 +84,7 @@
     /**
      * Extended list of typical french stopwords.
      */
  -  private String[] FRENCH_STOP_WORDS = {
  +  public final static String[] FRENCH_STOP_WORDS = {
       "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
       "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
       "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
  @@ -119,7 +119,7 @@
     private Set excltable = new HashSet();
   
     /**
  -   * Builds an analyzer.
  +   * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
      */
     public FrenchAnalyzer() {
       stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
  
  
  
  1.9       +3 -3      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
  
  Index: BrazilianAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- BrazilianAnalyzer.java	16 Aug 2004 20:42:59 -0000	1.8
  +++ BrazilianAnalyzer.java	17 Oct 2004 11:41:40 -0000	1.9
  @@ -73,14 +73,14 @@
    * will not be indexed at all) and an external list of exclusions (word that will
    * not be stemmed, but indexed).
    *
  - * @author    Jo�o Kramer
  + * @author    Jo�o Kramer
    */
   public final class BrazilianAnalyzer extends Analyzer {
   
   	/**
   	 * List of typical german stopwords.
   	 */
  -	private String[] BRAZILIAN_STOP_WORDS = {
  +	public final static String[] BRAZILIAN_STOP_WORDS = {
         "a","ainda","alem","ambas","ambos","antes",
         "ao","aonde","aos","apos","aquele","aqueles",
         "as","assim","com","como","contra","contudo",
  @@ -112,7 +112,7 @@
   	private Set excltable = new HashSet();
   
   	/**
  -	 * Builds an analyzer.
  +	 * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
   	 */
   	public BrazilianAnalyzer() {
   		stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
  
  
  
  1.4       +2 -2      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
  
  Index: DutchAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- DutchAnalyzer.java	12 Mar 2004 15:52:59 -0000	1.3
  +++ DutchAnalyzer.java	17 Oct 2004 11:41:41 -0000	1.4
  @@ -45,7 +45,7 @@
     /**
      * List of typical Dutch stopwords.
      */
  -  private String[] DUTCH_STOP_WORDS =
  +  public final static String[] DUTCH_STOP_WORDS =
         {
           "de", "en", "van", "ik", "te", "dat", "die", "in", "een",
           "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
  @@ -74,7 +74,7 @@
   
   
     /**
  -   * Builds an analyzer.
  +   * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}).
      */
     public DutchAnalyzer() {
       stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
  
  
  
  1.7       +3 -3      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
  
  Index: CzechAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- CzechAnalyzer.java	16 Aug 2004 20:42:59 -0000	1.6
  +++ CzechAnalyzer.java	17 Oct 2004 11:41:41 -0000	1.7
  @@ -80,7 +80,7 @@
   	/**
   	 * List of typical stopwords.
   	 */
  -	private static String[] STOP_WORDS = {
  +	public final static String[] CZECH_STOP_WORDS = {
           "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
           "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
           "jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
  @@ -107,10 +107,10 @@
   	private Set stoptable;
   
   	/**
  -	 * Builds an analyzer.
  +	 * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
   	 */
   	public CzechAnalyzer() {
  -		stoptable = StopFilter.makeStopSet( STOP_WORDS );
  +		stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
   	}
   
   	/**
  
  
  
  1.5       +4 -4      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
  
  Index: CJKAnalyzer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- CJKAnalyzer.java	12 Mar 2004 15:52:58 -0000	1.4
  +++ CJKAnalyzer.java	17 Oct 2004 11:41:41 -0000	1.5
  @@ -76,7 +76,7 @@
      * An array containing some common English words that are not usually
      * useful for searching. and some double-byte interpunctions.....
      */
  -  private static String[] stopWords = {
  +  public final static String[] STOP_WORDS = {
       "a", "and", "are", "as", "at", "be",
       "but", "by", "for", "if", "in",
       "into", "is", "it", "no", "not",
  @@ -97,10 +97,10 @@
     //~ Constructors -----------------------------------------------------------
   
     /**
  -   * Builds an analyzer which removes words in STOP_WORDS.
  +   * Builds an analyzer which removes words in {@link #STOP_WORDS}.
      */
     public CJKAnalyzer() {
  -    stopTable = StopFilter.makeStopSet(stopWords);
  +    stopTable = StopFilter.makeStopSet(STOP_WORDS);
     }
   
     /**
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org