You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by dn...@apache.org on 2004/08/18 16:30:48 UTC
cvs commit: jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br BrazilianStemFilter.java BrazilianStemmer.java
dnaber 2004/08/18 07:30:48
Modified: contributions/analyzers/src/java/org/apache/lucene/analysis/br
BrazilianStemFilter.java BrazilianStemmer.java
Log:
convert to utf-8
Revision Changes Path
1.7 +1 -1 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
Index: BrazilianStemFilter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- BrazilianStemFilter.java 12 Mar 2004 15:52:58 -0000 1.6
+++ BrazilianStemFilter.java 18 Aug 2004 14:30:47 -0000 1.7
@@ -66,7 +66,7 @@
/**
* Based on (copied) the GermanStemFilter
*
- * @author Jo�o Kramer
+ * @author João Kramer
* <p/>
* <p/>
* A filter that stemms german words. It supports a table of words that should
1.4 +20 -20 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
Index: BrazilianStemmer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- BrazilianStemmer.java 22 Jan 2004 20:54:46 -0000 1.3
+++ BrazilianStemmer.java 18 Aug 2004 14:30:48 -0000 1.4
@@ -56,7 +56,7 @@
/**
* A stemmer for brazilian words. The algorithm is based on the report
- * "A Fast and Simple Stemming Algorithm for German Words" by J�rg
+ * "A Fast and Simple Stemming Algorithm for German Words" by Jörg
* Caumanns (joerg.caumanns@isst.fhg.de).
*
* @author Gerhard Schwarz
@@ -282,8 +282,8 @@
/**
* 1) Turn to lowercase
* 2) Remove accents
- * 3) � -> a ; � -> o
- * 4) � -> c
+ * 3) ã -> a ; õ -> o
+ * 4) ç -> c
*
* @return null or a string transformed
*/
@@ -299,31 +299,31 @@
value = value.toLowerCase() ;
for (j=0 ; j < value.length() ; j++) {
- if ((value.charAt(j) == '�') ||
- (value.charAt(j) == '�') ||
- (value.charAt(j) == '�')) {
+ if ((value.charAt(j) == 'á') ||
+ (value.charAt(j) == 'â') ||
+ (value.charAt(j) == 'ã')) {
r= r + "a" ; continue ;
}
- if ((value.charAt(j) == '�') ||
- (value.charAt(j) == '�')) {
+ if ((value.charAt(j) == 'é') ||
+ (value.charAt(j) == 'ê')) {
r= r + "e" ; continue ;
}
- if (value.charAt(j) == '�') {
+ if (value.charAt(j) == 'í') {
r= r + "i" ; continue ;
}
- if ((value.charAt(j) == '�') ||
- (value.charAt(j) == '�') ||
- (value.charAt(j) == '�')) {
+ if ((value.charAt(j) == 'ó') ||
+ (value.charAt(j) == 'ô') ||
+ (value.charAt(j) == 'õ')) {
r= r + "o" ; continue ;
}
- if ((value.charAt(j) == '�') ||
- (value.charAt(j) == '�')) {
+ if ((value.charAt(j) == 'ú') ||
+ (value.charAt(j) == 'ü')) {
r= r + "u" ; continue ;
}
- if (value.charAt(j) == '�') {
+ if (value.charAt(j) == 'ç') {
r= r + "c" ; continue ;
}
- if (value.charAt(j) == '�') {
+ if (value.charAt(j) == 'ñ') {
r= r + "n" ; continue ;
}
@@ -410,7 +410,7 @@
}
/**
- * Creates CT (changed term) , substituting * '�' and '�' for 'a~' and 'o~'.
+ * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
*/
private void createCT( String term ) {
CT = changeTerm(term) ;
@@ -1008,7 +1008,7 @@
/**
* Residual suffix
*
- * If the word ends with one of the suffixes (os a i o � � �)
+ * If the word ends with one of the suffixes (os a i o á í ó)
* in RV, delete it
*
*/
@@ -1031,11 +1031,11 @@
}
/**
- * If the word ends with one of ( e � �) in RV,delete it,
+ * If the word ends with one of ( e é ê) in RV,delete it,
* and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
* delete the 'u' (or 'i')
*
- * Or if the word ends � remove the cedilha
+ * Or if the word ends ç remove the cedilha
*
*/
private void step5() {
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org