You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by dn...@apache.org on 2004/10/17 13:41:41 UTC
cvs commit: jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk CJKAnalyzer.java
dnaber 2004/10/17 04:41:41
Modified: contributions/analyzers/src/java/org/apache/lucene/analysis/fr
FrenchAnalyzer.java
contributions/analyzers/src/java/org/apache/lucene/analysis/br
BrazilianAnalyzer.java
contributions/analyzers/src/java/org/apache/lucene/analysis/nl
DutchAnalyzer.java
contributions/analyzers/src/java/org/apache/lucene/analysis/cz
CzechAnalyzer.java
contributions/analyzers/src/java/org/apache/lucene/analysis/cjk
CJKAnalyzer.java
Log:
improve doc for default constructor; make stop word list public but final
Revision Changes Path
1.9 +3 -3 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
Index: FrenchAnalyzer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- FrenchAnalyzer.java 17 Aug 2004 21:34:47 -0000 1.8
+++ FrenchAnalyzer.java 17 Oct 2004 11:41:40 -0000 1.9
@@ -84,7 +84,7 @@
/**
* Extended list of typical french stopwords.
*/
- private String[] FRENCH_STOP_WORDS = {
+ public final static String[] FRENCH_STOP_WORDS = {
"a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
"autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
"c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
@@ -119,7 +119,7 @@
private Set excltable = new HashSet();
/**
- * Builds an analyzer.
+ * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
*/
public FrenchAnalyzer() {
stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
1.9 +3 -3 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
Index: BrazilianAnalyzer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- BrazilianAnalyzer.java 16 Aug 2004 20:42:59 -0000 1.8
+++ BrazilianAnalyzer.java 17 Oct 2004 11:41:40 -0000 1.9
@@ -73,14 +73,14 @@
* will not be indexed at all) and an external list of exclusions (word that will
* not be stemmed, but indexed).
*
- * @author Jo�o Kramer
+ * @author Jo�o Kramer
*/
public final class BrazilianAnalyzer extends Analyzer {
/**
* List of typical german stopwords.
*/
- private String[] BRAZILIAN_STOP_WORDS = {
+ public final static String[] BRAZILIAN_STOP_WORDS = {
"a","ainda","alem","ambas","ambos","antes",
"ao","aonde","aos","apos","aquele","aqueles",
"as","assim","com","como","contra","contudo",
@@ -112,7 +112,7 @@
private Set excltable = new HashSet();
/**
- * Builds an analyzer.
+ * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
*/
public BrazilianAnalyzer() {
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
1.4 +2 -2 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
Index: DutchAnalyzer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- DutchAnalyzer.java 12 Mar 2004 15:52:59 -0000 1.3
+++ DutchAnalyzer.java 17 Oct 2004 11:41:41 -0000 1.4
@@ -45,7 +45,7 @@
/**
* List of typical Dutch stopwords.
*/
- private String[] DUTCH_STOP_WORDS =
+ public final static String[] DUTCH_STOP_WORDS =
{
"de", "en", "van", "ik", "te", "dat", "die", "in", "een",
"hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
@@ -74,7 +74,7 @@
/**
- * Builds an analyzer.
+ * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}).
*/
public DutchAnalyzer() {
stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
1.7 +3 -3 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
Index: CzechAnalyzer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- CzechAnalyzer.java 16 Aug 2004 20:42:59 -0000 1.6
+++ CzechAnalyzer.java 17 Oct 2004 11:41:41 -0000 1.7
@@ -80,7 +80,7 @@
/**
* List of typical stopwords.
*/
- private static String[] STOP_WORDS = {
+ public final static String[] CZECH_STOP_WORDS = {
"a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
"byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
"jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
@@ -107,10 +107,10 @@
private Set stoptable;
/**
- * Builds an analyzer.
+ * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
*/
public CzechAnalyzer() {
- stoptable = StopFilter.makeStopSet( STOP_WORDS );
+ stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
}
/**
1.5 +4 -4 jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
Index: CJKAnalyzer.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- CJKAnalyzer.java 12 Mar 2004 15:52:58 -0000 1.4
+++ CJKAnalyzer.java 17 Oct 2004 11:41:41 -0000 1.5
@@ -76,7 +76,7 @@
* An array containing some common English words that are not usually
* useful for searching. and some double-byte interpunctions.....
*/
- private static String[] stopWords = {
+ public final static String[] STOP_WORDS = {
"a", "and", "are", "as", "at", "be",
"but", "by", "for", "if", "in",
"into", "is", "it", "no", "not",
@@ -97,10 +97,10 @@
//~ Constructors -----------------------------------------------------------
/**
- * Builds an analyzer which removes words in STOP_WORDS.
+ * Builds an analyzer which removes words in {@link #STOP_WORDS}.
*/
public CJKAnalyzer() {
- stopTable = StopFilter.makeStopSet(stopWords);
+ stopTable = StopFilter.makeStopSet(STOP_WORDS);
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org