You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by dn...@apache.org on 2004/08/18 16:30:48 UTC

cvs commit: jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br BrazilianStemFilter.java BrazilianStemmer.java

dnaber      2004/08/18 07:30:48

  Modified:    contributions/analyzers/src/java/org/apache/lucene/analysis/br
                        BrazilianStemFilter.java BrazilianStemmer.java
  Log:
  convert to utf-8
  
  Revision  Changes    Path
  1.7       +1 -1      jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
  
  Index: BrazilianStemFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- BrazilianStemFilter.java	12 Mar 2004 15:52:58 -0000	1.6
  +++ BrazilianStemFilter.java	18 Aug 2004 14:30:47 -0000	1.7
  @@ -66,7 +66,7 @@
   /**
    * Based on (copied) the GermanStemFilter
    *
  - * @author Jo�o Kramer
  + * @author João Kramer
    *         <p/>
    *         <p/>
    *         A filter that stemms german words. It supports a table of words that should
  
  
  
  1.4       +20 -20    jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
  
  Index: BrazilianStemmer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- BrazilianStemmer.java	22 Jan 2004 20:54:46 -0000	1.3
  +++ BrazilianStemmer.java	18 Aug 2004 14:30:48 -0000	1.4
  @@ -56,7 +56,7 @@
   
   /**
    * A stemmer for brazilian words. The algorithm is based on the report
  - * "A Fast and Simple Stemming Algorithm for German Words" by J�rg
  + * "A Fast and Simple Stemming Algorithm for German Words" by Jörg
    * Caumanns (joerg.caumanns@isst.fhg.de).
    *
    * @author    Gerhard Schwarz
  @@ -282,8 +282,8 @@
   	/**
      * 1) Turn to lowercase
      * 2) Remove accents
  -   * 3) � -> a ; � -> o
  -   * 4) � -> c
  +   * 3) ã -> a ; õ -> o
  +   * 4) ç -> c
      *
      * @return null or a string transformed
   	 */
  @@ -299,31 +299,31 @@
   
       value = value.toLowerCase() ;
       for (j=0 ; j < value.length() ; j++) {
  -      if ((value.charAt(j) == '�') ||
  -          (value.charAt(j) == '�') ||
  -          (value.charAt(j) == '�')) {
  +      if ((value.charAt(j) == 'á') ||
  +          (value.charAt(j) == 'â') ||
  +          (value.charAt(j) == 'ã')) {
           r= r + "a" ; continue ;
         }
  -      if ((value.charAt(j) == '�') ||
  -          (value.charAt(j) == '�')) {
  +      if ((value.charAt(j) == 'é') ||
  +          (value.charAt(j) == 'ê')) {
           r= r + "e" ; continue ;
         }
  -      if (value.charAt(j) == '�') {
  +      if (value.charAt(j) == 'í') {
           r= r + "i" ; continue ;
         }
  -      if ((value.charAt(j) == '�') ||
  -          (value.charAt(j) == '�') ||
  -          (value.charAt(j) == '�')) {
  +      if ((value.charAt(j) == 'ó') ||
  +          (value.charAt(j) == 'ô') ||
  +          (value.charAt(j) == 'õ')) {
           r= r + "o" ; continue ;
         }
  -      if ((value.charAt(j) == '�') ||
  -          (value.charAt(j) == '�')) {
  +      if ((value.charAt(j) == 'ú') ||
  +          (value.charAt(j) == 'ü')) {
           r= r + "u" ; continue ;
         }
  -      if (value.charAt(j) == '�') {
  +      if (value.charAt(j) == 'ç') {
           r= r + "c" ; continue ;
         }
  -      if (value.charAt(j) == '�') {
  +      if (value.charAt(j) == 'ñ') {
           r= r + "n" ; continue ;
         }
   
  @@ -410,7 +410,7 @@
     }
   
   	/**
  -	 * Creates CT (changed term) , substituting * '�' and '�' for 'a~' and 'o~'.
  +	 * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
   	 */
   	private void createCT( String term ) {
       CT = changeTerm(term) ;
  @@ -1008,7 +1008,7 @@
   	/**
   	 * Residual suffix
      *
  -   * If the word ends with one of the suffixes (os a i o � � �)
  +   * If the word ends with one of the suffixes (os a i o á í ó)
      * in RV, delete it
      *
   	*/
  @@ -1031,11 +1031,11 @@
     }
   
   	/**
  -	 * If the word ends with one of ( e � �) in RV,delete it,
  +	 * If the word ends with one of ( e é ê) in RV,delete it,
      * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
      * delete the 'u' (or 'i')
      *
  -   * Or if the word ends � remove the cedilha
  +   * Or if the word ends ç remove the cedilha
      *
   	*/
   	private void step5() {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org