You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2009/08/18 14:55:28 UTC
svn commit: r805400 [1/2] - in /lucene/java/trunk/contrib:
analyzers/common/src/java/org/apache/lucene/analysis/ar/
analyzers/common/src/java/org/apache/lucene/analysis/br/
analyzers/common/src/java/org/apache/lucene/analysis/cjk/
analyzers/common/src/...
Author: rmuir
Date: Tue Aug 18 12:55:26 2009
New Revision: 805400
URL: http://svn.apache.org/viewvc?rev=805400&view=rev
Log:
LUCENE-1692: Additional tests and javadocs for contrib/analyzers
Added:
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/package.html (with props)
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/package.html (with props)
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/package.html (with props)
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/package.html (with props)
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/package.html (with props)
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/package.html (with props)
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/package.html (with props)
Modified:
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/package.html
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/package.html
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/package.html
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekCharsets.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/TestSmartChineseAnalyzer.java
lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/TestSynonymTokenFilter.java
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -34,7 +34,7 @@
import org.apache.lucene.analysis.WordlistLoader;
/**
- * Analyzer for Arabic.
+ * {@link Analyzer} for Arabic.
* <p>
* This analyzer implements light-stemming as specified by:
* <i>
@@ -108,10 +108,11 @@
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
- * @return A TokenStream built from an ArabicTokenizer filtered with
- * StopFilter, LowerCaseFilter, ArabicNormalizationFilter and ArabicStemFilter.
+ * @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
+ * {@link StopFilter}, {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}
+ * and {@link ArabicStemFilter}.
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new ArabicLetterTokenizer( reader );
@@ -129,12 +130,12 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
+ * in the provided {@link Reader}.
*
- * @return A TokenStream built from an ArabicTokenizer filtered with
- * StopFilter, LowerCaseFilter, ArabicNormalizationFilter and
- * ArabicStemFilter.
+ * @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
+ * {@link StopFilter}, {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}
+ * and {@link ArabicStemFilter}.
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java Tue Aug 18 12:55:26 2009
@@ -24,7 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * A TokenFilter that applies {@link ArabicNormalizer} to normalize the orthography.
+ * A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography.
*
*/
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java Tue Aug 18 12:55:26 2009
@@ -24,7 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * A TokenFilter that applies {@link ArabicStemmer} to stem Arabic words..
+ * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words..
*
*/
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -34,15 +34,17 @@
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
- * Analyzer for Brazilian language. Supports an external list of stopwords (words that
- * will not be indexed at all) and an external list of exclusions (word that will
+ * {@link Analyzer} for Brazilian Portuguese language.
+ * <p>
+ * Supports an external list of stopwords (words that
+ * will not be indexed at all) and an external list of exclusions (words that will
* not be stemmed, but indexed).
- *
+ * </p>
*/
public final class BrazilianAnalyzer extends Analyzer {
/**
- * List of typical Brazilian stopwords.
+ * List of typical Brazilian Portuguese stopwords.
*/
public final static String[] BRAZILIAN_STOP_WORDS = {
"a","ainda","alem","ambas","ambos","antes",
@@ -67,7 +69,7 @@
/**
- * Contains the stopwords used with the StopFilter.
+ * Contains the stopwords used with the {@link StopFilter}.
*/
private Set stoptable = new HashSet();
@@ -111,7 +113,7 @@
excltable = StopFilter.makeStopSet( exclusionlist );
}
/**
- * Builds an exclusionlist from a Hashtable.
+ * Builds an exclusionlist from a {@link Map}.
*/
public void setStemExclusionTable( Map exclusionlist ) {
excltable = new HashSet(exclusionlist.keySet());
@@ -124,11 +126,11 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * LowerCaseFilter, StandardFilter, StopFilter, and
- * BrazilianStemFilter.
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
+ * {@link BrazilianStemFilter}.
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer( reader );
@@ -145,12 +147,12 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
+ * in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * LowerCaseFilter, StandardFilter, StopFilter, and
- * BrazilianStemFilter.
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
+ * {@link BrazilianStemFilter}.
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java Tue Aug 18 12:55:26 2009
@@ -25,13 +25,13 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * Based on GermanStemFilter
+ * A {@link TokenFilter} that applies {@link BrazilianStemmer}.
*
*/
public final class BrazilianStemFilter extends TokenFilter {
/**
- * The actual token in the input stream.
+ * {@link BrazilianStemmer} in use by this filter.
*/
private BrazilianStemmer stemmer = null;
private Set exclusions = null;
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java Tue Aug 18 12:55:26 2009
@@ -18,7 +18,7 @@
*/
/**
- * A stemmer for Brazilian words.
+ * A stemmer for Brazilian Portuguese words.
*/
public class BrazilianStemmer {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/package.html?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/package.html (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/package.html Tue Aug 18 12:55:26 2009
@@ -1,5 +1,5 @@
<html><head></head>
<body>
-Analyzer for Brazilian.
+Analyzer for Brazilian Portuguese.
</body>
</html>
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -28,7 +28,8 @@
/**
- * Filters CJKTokenizer with StopFilter.
+ * An {@link Analyzer} that tokenizes text with {@link CJKTokenizer} and
+ * filters with {@link StopFilter}
*
*/
public class CJKAnalyzer extends Analyzer {
@@ -77,11 +78,12 @@
//~ Methods ----------------------------------------------------------------
/**
- * get token stream from input
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
* @param fieldName lucene field name
- * @param reader input reader
- * @return TokenStream
+ * @param reader input {@link Reader}
+ * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
+ * {@link StopFilter}
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
return new StopFilter(new CJKTokenizer(reader), stopTable);
@@ -93,11 +95,13 @@
};
/**
- * get (possibly reused) token stream from input
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
+ * in the provided {@link Reader}.
*
* @param fieldName lucene field name
- * @param reader input reader
- * @return TokenStream
+ * @param reader Input {@link Reader}
+ * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
+ * {@link StopFilter}
*/
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
/* tokenStream() is final, no back compat issue */
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java Tue Aug 18 12:55:26 2009
@@ -27,13 +27,20 @@
/**
- * CJKTokenizer was modified from StopTokenizer which does a decent job for
- * most European languages. It performs other token methods for double-byte
- * Characters: the token will return at each two characters with overlap match.<br>
- * Example: "java C1C2C3C4" will be segment to: "java" "C1C2" "C2C3" "C3C4" it
- * also need filter filter zero length token ""<br>
- * for Digit: digit, '+', '#' will token as letter<br>
- * for more info on Asia language(Chinese Japanese Korean) text segmentation:
+ * CJKTokenizer is designed for Chinese, Japanese, and Korean languages.
+ * <p>
+ * The tokens returned are every two adjacent characters with overlap match.
+ * </p>
+ * <p>
+ * Example: "java C1C2C3C4" will be segmented to: "java" "C1C2" "C2C3" "C3C4".
+ * </p>
+ * Additionally, the following is applied to Latin text (such as English):
+ * <ul>
+ * <li>Text is converted to lowercase.
+ * <li>Numeric digits, '+', '#', and '_' are tokenized as letters.
+ * <li>Full-width forms are converted to half-width forms.
+ * </ul>
+ * For more info on Asian language (Chinese, Japanese, and Korean) text segmentation:
* please search <a
* href="http://www.google.com/search?q=word+chinese+segment">google</a>
*
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -24,13 +24,8 @@
import org.apache.lucene.analysis.Tokenizer;
/**
- * Title: ChineseAnalyzer
- * Description:
- * Subclass of org.apache.lucene.analysis.Analyzer
- * build from a ChineseTokenizer, filtered with ChineseFilter.
- * Copyright: Copyright (c) 2001
- * Company:
- * @version 1.0
+ * An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and
+ * filters with {@link ChineseFilter}
*
*/
@@ -40,9 +35,10 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
- * @return A TokenStream build from a ChineseTokenizer filtered with ChineseFilter.
+ * @return A {@link TokenStream} built from a {@link ChineseTokenizer}
+ * filtered with {@link ChineseFilter}.
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new ChineseTokenizer(reader);
@@ -56,11 +52,11 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text in the
- * provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the
+ * provided {@link Reader}.
*
- * @return A TokenStream build from a ChineseTokenizer filtered with
- * ChineseFilter.
+ * @return A {@link TokenStream} built from a {@link ChineseTokenizer}
+ * filtered with {@link ChineseFilter}.
*/
public final TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java Tue Aug 18 12:55:26 2009
@@ -26,18 +26,19 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * Title: ChineseFilter
- * Description: Filter with a stop word table
- * Rule: No digital is allowed.
- * English word/token should larger than 1 character.
- * One Chinese character as one Chinese word.
+ * A {@link TokenFilter} with a stop word table.
+ * <ul>
+ * <li>Numeric tokens are removed.
+ * <li>English tokens must be larger than 1 character.
+ * <li>One Chinese character as one Chinese word.
+ * </ul>
* TO DO:
- * 1. Add Chinese stop words, such as \ue400
- * 2. Dictionary based Chinese word extraction
- * 3. Intelligent Chinese word extraction
- *
- * Copyright: Copyright (c) 2001
- * Company:
+ * <ol>
+ * <li>Add Chinese stop words, such as \ue400
+ * <li>Dictionary based Chinese word extraction
+ * <li>Intelligent Chinese word extraction
+ * </ol>
+ *
* @version 1.0
*
*/
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java Tue Aug 18 12:55:26 2009
@@ -27,28 +27,29 @@
/**
- * Title: ChineseTokenizer
- * Description: Extract tokens from the Stream using Character.getType()
- * Rule: A Chinese character as a single token
- * Copyright: Copyright (c) 2001
- * Company:
- *
- * The difference between thr ChineseTokenizer and the
- * CJKTokenizer (id=23545) is that they have different
- * token parsing logic.
+ * Tokenize Chinese text as individual chinese characters.
*
- * Let me use an example. If having a Chinese text
- * "C1C2C3C4" to be indexed, the tokens returned from the
- * ChineseTokenizer are C1, C2, C3, C4. And the tokens
- * returned from the CJKTokenizer are C1C2, C2C3, C3C4.
- *
- * Therefore the index the CJKTokenizer created is much
- * larger.
- *
+ * <p>
+ * The difference between ChineseTokenizer and
+ * CJKTokenizer is that they have different
+ * token parsing logic.
+ * </p>
+ * <p>
+ * For example, if the Chinese text
+ * "C1C2C3C4" is to be indexed:
+ * <ul>
+ * <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4.
+ * <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.
+ * </ul>
+ * </p>
+ * <p>
+ * Therefore the index created by CJKTokenizer is much larger.
+ * </p>
+ * <p>
* The problem is that when searching for C1, C1C2, C1C3,
* C4C2, C1C2C3 ... the ChineseTokenizer works, but the
* CJKTokenizer will not work.
- *
+ * </p>
* @version 1.0
*
*/
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/package.html?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/package.html (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/package.html Tue Aug 18 12:55:26 2009
@@ -3,7 +3,7 @@
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
-Analyzer for Chinese, which indexes unigrams (individuals chinese characters).
+Analyzer for Chinese, which indexes unigrams (individual chinese characters).
<p>
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
<ul>
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Tue Aug 18 12:55:26 2009
@@ -118,10 +118,11 @@
* Create a set of words from an array
* The resulting Set does case insensitive matching
* TODO We should look for a faster dictionary lookup approach.
- * @param dictionary
- * @return
+ * @param dictionary
+ * @return {@link Set} of lowercased terms
*/
public static final Set makeDictionary(final String[] dictionary) {
+ // is the below really case insensitive?
CharArraySet dict = new CharArraySet(dictionary.length, false);
addAllLowerCase(dict, Arrays.asList(dictionary));
return dict;
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java Tue Aug 18 12:55:26 2009
@@ -21,18 +21,21 @@
import java.util.Set;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter; // for javadocs
import org.apache.lucene.analysis.TokenStream;
/**
- * A TokenFilter that decomposes compound words found in many germanic languages
+ * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
+ * <p>
* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
* "Donaudampfschiff" even when you only enter "schiff".
* It uses a brute-force algorithm to achieve this.
+ * </p>
*/
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param dictionary the word dictionary to match against
* @param minWordSize only words longer than this get processed
* @param minSubwordSize only subwords longer than this get to the output stream
@@ -46,7 +49,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param dictionary the word dictionary to match against
*/
public DictionaryCompoundWordTokenFilter(TokenStream input, String[] dictionary) {
@@ -55,7 +58,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
* lower case strings.
*/
@@ -65,7 +68,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
* lower case strings.
* @param minWordSize only words longer than this get processed
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java Tue Aug 18 12:55:26 2009
@@ -24,16 +24,19 @@
import java.util.Set;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter; // for javadocs
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.xml.sax.InputSource;
/**
- * A TokenFilter that decomposes compound words found in many germanic languages
+ * A {@link TokenFilter} that decomposes compound words found in many Germanic languages.
+ * <p>
* "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
- * "Donaudampfschiff" even when you only enter "schiff" It uses a hyphenation
+ * "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
* grammar and a word dictionary to achieve this.
+ * </p>
*/
public class HyphenationCompoundWordTokenFilter extends
CompoundWordTokenFilterBase {
@@ -41,7 +44,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param hyphenator the hyphenation pattern tree to use for hyphenation
* @param dictionary the word dictionary to match against
* @param minWordSize only words longer than this get processed
@@ -60,7 +63,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param hyphenator the hyphenation pattern tree to use for hyphenation
* @param dictionary the word dictionary to match against
*/
@@ -72,7 +75,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param hyphenator the hyphenation pattern tree to use for hyphenation
* @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
* lower case strings.
@@ -85,7 +88,7 @@
/**
*
- * @param input the token stream to process
+ * @param input the {@link TokenStream} to process
* @param hyphenator the hyphenation pattern tree to use for hyphenation
* @param dictionary the word dictionary to match against. If this is a {@link org.apache.lucene.analysis.CharArraySet CharArraySet} it must have set ignoreCase=false and only contain
* lower case strings.
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java Tue Aug 18 12:55:26 2009
@@ -110,7 +110,7 @@
/**
* Read hyphenation patterns from an XML file.
*
- * @param filename the filename
+ * @param f the filename
* @throws HyphenationException In case the parsing fails
*/
public void loadPatterns(File f) throws HyphenationException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/package.html?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/package.html (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/package.html Tue Aug 18 12:55:26 2009
@@ -5,7 +5,7 @@
</head>
<body>
A filter that decomposes compound words you find in many Germanic
-languages to the word parts. This example shows what it does:
+languages into the word parts. This example shows what it does:
<table border="1">
<tr>
<th>Input token stream</th>
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -31,11 +31,12 @@
import java.util.Set;
/**
- * Analyzer for Czech language. Supports an external list of stopwords (words that
- * will not be indexed at all).
- * A default set of stopwords is used unless an alternative list is specified, the
- * exclusion list is empty by default.
- *
+ * {@link Analyzer} for Czech language.
+ * <p>
+ * Supports an external list of stopwords (words that
+ * will not be indexed at all).
+ * A default set of stopwords is used unless an alternative list is specified.
+ * </p>
*/
public final class CzechAnalyzer extends Analyzer {
@@ -64,7 +65,7 @@
};
/**
- * Contains the stopwords used with the StopFilter.
+ * Contains the stopwords used with the {@link StopFilter}.
*/
private Set stoptable;
@@ -125,10 +126,10 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, LowerCaseFilter, and StopFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
*/
public final TokenStream tokenStream( String fieldName, Reader reader ) {
TokenStream result = new StandardTokenizer( reader );
@@ -144,11 +145,11 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text in
- * the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in
+ * the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, LowerCaseFilter, and StopFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -35,12 +35,14 @@
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
- * Analyzer for German language. Supports an external list of stopwords (words that
+ * {@link Analyzer} for German language.
+ * <p>
+ * Supports an external list of stopwords (words that
* will not be indexed at all) and an external list of exclusions (word that will
* not be stemmed, but indexed).
- * A default set of stopwords is used unless an alternative list is specified, the
+ * A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
- *
+ * </p>
*
* @version $Id$
*/
@@ -65,7 +67,7 @@
};
/**
- * Contains the stopwords used with the StopFilter.
+ * Contains the stopwords used with the {@link StopFilter}.
*/
private Set stopSet = new HashSet();
@@ -75,8 +77,8 @@
private Set exclusionSet = new HashSet();
/**
- * Builds an analyzer with the default stop words
- * (<code>GERMAN_STOP_WORDS</code>).
+ * Builds an analyzer with the default stop words:
+ * {@link #GERMAN_STOP_WORDS}.
*/
public GermanAnalyzer() {
stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
@@ -115,7 +117,7 @@
}
/**
- * Builds an exclusionlist from a Hashtable.
+ * Builds an exclusionlist from a {@link Map}
*/
public void setStemExclusionTable(Map exclusionlist) {
exclusionSet = new HashSet(exclusionlist.keySet());
@@ -129,10 +131,11 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, LowerCaseFilter, StopFilter, GermanStemFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}, and
+ * {@link GermanStemFilter}
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
@@ -149,11 +152,12 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
+ * in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, LowerCaseFilter, StopFilter, GermanStemFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}, and
+ * {@link GermanStemFilter}
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java Tue Aug 18 12:55:26 2009
@@ -25,10 +25,12 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * A filter that stems German words. It supports a table of words that should
+ * A {@link TokenFilter} that stems German words.
+ * <p>
+ * It supports a table of words that should
* not be stemmed at all. The stemmer used can be changed at runtime after the
- * filter object is created (as long as it is a GermanStemmer).
- *
+ * filter object is created (as long as it is a {@link GermanStemmer}).
+ * </p>
*
* @version $Id$
*/
@@ -78,7 +80,7 @@
}
/**
- * Set a alternative/custom GermanStemmer for this filter.
+ * Set a alternative/custom {@link GermanStemmer} for this filter.
*/
public void setStemmer( GermanStemmer stemmer )
{
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java Tue Aug 18 12:55:26 2009
@@ -19,10 +19,12 @@
*/
/**
- * A stemmer for German words. The algorithm is based on the report
+ * A stemmer for German words.
+ * <p>
+ * The algorithm is based on the report
* "A Fast and Simple Stemming Algorithm for German Words" by Jörg
* Caumanns (joerg.caumanns at isst.fhg.de).
- *
+ * </p>
*
* @version $Id$
*/
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -30,10 +30,12 @@
import java.util.Set;
/**
- * Analyzer for the Greek language. Supports an external list of stopwords (words
+ * {@link Analyzer} for the Greek language.
+ * <p>
+ * Supports an external list of stopwords (words
* that will not be indexed at all).
* A default set of stopwords is used unless an alternative list is specified.
- *
+ * </p>
*/
public final class GreekAnalyzer extends Analyzer
{
@@ -145,14 +147,14 @@
};
/**
- * Contains the stopwords used with the StopFilter.
+ * Contains the stopwords used with the {@link StopFilter}.
*/
private Set stopSet = new HashSet();
/**
* Charset for Greek letters.
* Represents encoding for 24 lowercase Greek letters.
- * Predefined charsets can be taken from GreekCharSets class
+ * Predefined charsets can be taken from {@link GreekCharsets} class
*/
private char[] charset;
@@ -209,10 +211,10 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * GreekLowerCaseFilter and StopFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link GreekLowerCaseFilter} and {@link StopFilter}
*/
public TokenStream tokenStream(String fieldName, Reader reader)
{
@@ -228,11 +230,11 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
+ * in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * GreekLowerCaseFilter and StopFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
+ * {@link GreekLowerCaseFilter} and {@link StopFilter}
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekCharsets.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekCharsets.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekCharsets.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekCharsets.java Tue Aug 18 12:55:26 2009
@@ -19,10 +19,11 @@
/**
* GreekCharsets class contains encodings schemes (charsets) and toLowerCase() method implementation
* for greek characters in Unicode, ISO-8859-7 and Microsoft Windows CP1253.
+ * <p>
* Each encoding scheme contains lowercase (positions 0-35) and uppercase (position 36-68) characters,
* including accented ones. One should be able to add other encoding schemes (see RFC 1947) by adding
* the definition of a new charset as well as the required logic in the toLowerCase() method.
- *
+ * </p>
*/
public class GreekCharsets
{
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -36,12 +36,12 @@
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
/**
- * Analyzer for Persian.
- *
- * Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around
- * ZWNJ in addition to space. Some persian-specific variant forms (such as farsi
+ * {@link Analyzer} for Persian.
+ * <p>
+ * This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around
+ * zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
* yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
- *
+ * </p>
*/
public final class PersianAnalyzer extends Analyzer {
@@ -107,11 +107,13 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided
+ * {@link Reader}.
*
- * @return A TokenStream build from a ArabicLetterTokenizer filtered with
- * LowerCaseFilter, ArabicNormalizationFilter,
- * PersianNormalizationFilter and Persian Stop words
+ * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
+ * filtered with {@link LowerCaseFilter},
+ * {@link ArabicNormalizationFilter},
+ * {@link PersianNormalizationFilter} and Persian Stop words
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new ArabicLetterTokenizer(reader);
@@ -134,12 +136,13 @@
}
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
+ * in the provided {@link Reader}.
*
- * @return A TokenStream build from a ArabicLetterTokenizer filtered with
- * LowerCaseFilter, ArabicNormalizationFilter,
- * PersianNormalizationFilter and Persian Stop words
+ * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
+ * filtered with {@link LowerCaseFilter},
+ * {@link ArabicNormalizationFilter},
+ * {@link PersianNormalizationFilter} and Persian Stop words
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java Tue Aug 18 12:55:26 2009
@@ -24,7 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * A TokenFilter that applies {@link PersianNormalizer} to normalize the
+ * A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the
* orthography.
*
*/
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Tue Aug 18 12:55:26 2009
@@ -22,16 +22,17 @@
import java.util.HashSet;
import java.util.Arrays;
import java.util.Iterator;
+import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * Removes elisions from a token stream. For example, "l'avion" (the plane) will be
+ * Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
* tokenized as "avion" (plane).
* <p>
- * Note that StandardTokenizer sees " ' " as a space, and cuts it out.
+ * Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out.
*
* @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
*/
@@ -78,7 +79,7 @@
}
/**
- * Returns the next input Token with term() without elisioned start
+ * Increments the {@link TokenStream} with a {@link TermAttribute} without elisioned start
*/
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -34,12 +34,14 @@
import java.util.Set;
/**
- * Analyzer for French language. Supports an external list of stopwords (words that
+ * {@link Analyzer} for French language.
+ * <p>
+ * Supports an external list of stopwords (words that
* will not be indexed at all) and an external list of exclusions (word that will
* not be stemmed, but indexed).
- * A default set of stopwords is used unless an alternative list is specified, the
+ * A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
- *
+ * </p>
*
* @version $Id$
*/
@@ -74,7 +76,7 @@
};
/**
- * Contains the stopwords used with the StopFilter.
+ * Contains the stopwords used with the {@link StopFilter}.
*/
private Set stoptable = new HashSet();
/**
@@ -127,10 +129,12 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the provided
+ * {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+ * filtered with {@link StandardFilter}, {@link StopFilter},
+ * {@link FrenchStemFilter} and {@link LowerCaseFilter}
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
@@ -152,11 +156,12 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
+ * text in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+ * filtered with {@link StandardFilter}, {@link StopFilter},
+ * {@link FrenchStemFilter} and {@link LowerCaseFilter}
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java Tue Aug 18 12:55:26 2009
@@ -17,7 +17,6 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@@ -28,10 +27,12 @@
import java.util.Set;
/**
- * A filter that stemms french words. It supports a table of words that should
+ * A {@link TokenFilter} that stems french words.
+ * <p>
+ * It supports a table of words that should
* not be stemmed at all. The used stemmer can be changed at runtime after the
- * filter object is created (as long as it is a FrenchStemmer).
- *
+ * filter object is created (as long as it is a {@link FrenchStemmer}).
+ * </p>
*/
public final class FrenchStemFilter extends TokenFilter {
@@ -75,7 +76,7 @@
}
}
/**
- * Set a alternative/custom FrenchStemmer for this filter.
+ * Set a alternative/custom {@link FrenchStemmer} for this filter.
*/
public void setStemmer( FrenchStemmer stemmer ) {
if ( stemmer != null ) {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java Tue Aug 18 12:55:26 2009
@@ -18,11 +18,13 @@
*/
/**
- * A stemmer for French words. The algorithm is based on the work of
+ * A stemmer for French words.
+ * <p>
+ * The algorithm is based on the work of
* Dr Martin Porter on his snowball project<br>
* refer to http://snowball.sourceforge.net/french/stemmer.html<br>
* (French stemming algorithm) for details
- *
+ * </p>
*/
public class FrenchStemmer {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java Tue Aug 18 12:55:26 2009
@@ -23,7 +23,7 @@
import java.io.IOException;
/**
- * Links two PrefixAwareTokenFilter
+ * Links two {@link PrefixAwareTokenFilter}.
* <p/>
* <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
* the ones located in org.apache.lucene.analysis.tokenattributes.
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java Tue Aug 18 12:55:26 2009
@@ -29,7 +29,7 @@
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
- * A token stream containing a single token.
+ * A {@link TokenStream} containing a single token.
*/
public class SingleTokenTokenStream extends TokenStream {
Added: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/package.html?rev=805400&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/package.html (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/package.html Tue Aug 18 12:55:26 2009
@@ -0,0 +1,5 @@
+<html><head></head>
+<body>
+Miscellaneous TokenStreams
+</body>
+</html>
Propchange: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Tue Aug 18 12:55:26 2009
@@ -27,9 +27,9 @@
/**
* Tokenizes the given token into n-grams of given size(s).
- *
- * This filter create n-grams from the beginning edge or ending edge of a input token.
- *
+ * <p>
+ * This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token.
+ * </p>
*/
public class EdgeNGramTokenFilter extends TokenFilter {
public static final Side DEFAULT_SIDE = Side.FRONT;
@@ -84,7 +84,7 @@
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param input TokenStream holding the input to be tokenized
+ * @param input {@link TokenStream} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
@@ -114,7 +114,7 @@
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
- * @param input TokenStream holding the input to be tokenized
+ * @param input {@link TokenStream} holding the input to be tokenized
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Tue Aug 18 12:55:26 2009
@@ -19,7 +19,6 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@@ -28,10 +27,10 @@
/**
* Tokenizes the input from an edge into n-grams of given size(s).
- *
- * This tokenizer create n-grams from the beginning edge or ending edge of a input token.
+ * <p>
+ * This {@link Tokenizer} create n-grams from the beginning edge or ending edge of a input token.
* MaxGram can't be larger than 1024 because of limitation.
- *
+ * </p>
*/
public class EdgeNGramTokenizer extends Tokenizer {
public static final Side DEFAULT_SIDE = Side.FRONT;
@@ -82,7 +81,7 @@
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param input Reader holding the input to be tokenized
+ * @param input {@link Reader} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
@@ -112,7 +111,7 @@
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
- * @param input Reader holding the input to be tokenized
+ * @param input {@link Reader} holding the input to be tokenized
* @param sideLabel the name of the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Tue Aug 18 12:55:26 2009
@@ -44,7 +44,7 @@
/**
* Creates NGramTokenFilter with given min and max n-grams.
- * @param input TokenStream holding the input to be tokenized
+ * @param input {@link TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@@ -65,7 +65,7 @@
/**
* Creates NGramTokenFilter with default min and max n-grams.
- * @param input TokenStream holding the input to be tokenized
+ * @param input {@link TokenStream} holding the input to be tokenized
*/
public NGramTokenFilter(TokenStream input) {
this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Tue Aug 18 12:55:26 2009
@@ -44,7 +44,7 @@
/**
* Creates NGramTokenizer with given min and max n-grams.
- * @param input Reader holding the input to be tokenized
+ * @param input {@link Reader} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@@ -64,7 +64,7 @@
}
/**
* Creates NGramTokenizer with default min and max n-grams.
- * @param input Reader holding the input to be tokenized
+ * @param input {@link Reader} holding the input to be tokenized
*/
public NGramTokenizer(Reader input) {
this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
Added: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/package.html?rev=805400&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/package.html (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/package.html Tue Aug 18 12:55:26 2009
@@ -0,0 +1,5 @@
+<html><head></head>
+<body>
+Character n-gram tokenizers and filters.
+</body>
+</html>
Propchange: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -33,13 +33,15 @@
import java.util.Map;
/**
- * Analyzer for Dutch language. Supports an external list of stopwords (words that
+ * {@link Analyzer} for Dutch language.
+ * <p>
+ * Supports an external list of stopwords (words that
* will not be indexed at all), an external list of exclusions (word that will
* not be stemmed, but indexed) and an external list of word-stem pairs that overrule
* the algorithm (dictionary stemming).
- * A default set of stopwords is used unless an alternative list is specified, the
+ * A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
- *
+ * </p>
*/
public class DutchAnalyzer extends Analyzer {
/**
@@ -165,10 +167,12 @@
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided TextReader.
+ * Creates a {@link TokenStream} which tokenizes all the text in the
+ * provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with StandardFilter,
- * StopFilter, DutchStemFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+ * filtered with {@link StandardFilter}, {@link StopFilter},
+ * and {@link DutchStemFilter}
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
@@ -184,11 +188,12 @@
};
/**
- * Returns a (possibly reused) TokenStream which tokenizes all the text
- * in the provided Reader.
+ * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
+ * text in the provided {@link Reader}.
*
- * @return A TokenStream built from a StandardTokenizer filtered with
- * StandardFilter, StopFilter, DutchStemFilter
+ * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+ * filtered with {@link StandardFilter}, {@link StopFilter},
+ * and {@link DutchStemFilter}
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java Tue Aug 18 12:55:26 2009
@@ -28,10 +28,12 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
- * A filter that stems Dutch words. It supports a table of words that should
+ * A {@link TokenFilter} that stems Dutch words.
+ * <p>
+ * It supports a table of words that should
* not be stemmed at all. The stemmer used can be changed at runtime after the
- * filter object is created (as long as it is a DutchStemmer).
- *
+ * filter object is created (as long as it is a {@link DutchStemmer}).
+ * </p>
*/
public final class DutchStemFilter extends TokenFilter {
/**
@@ -85,7 +87,7 @@
}
/**
- * Set a alternative/custom DutchStemmer for this filter.
+ * Set a alternative/custom {@link DutchStemmer} for this filter.
*/
public void setStemmer(DutchStemmer stemmer) {
if (stemmer != null) {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java Tue Aug 18 12:55:26 2009
@@ -20,11 +20,12 @@
import java.util.Map;
/**
- *
- * A stemmer for Dutch words. The algorithm is an implementation of
+ * A stemmer for Dutch words.
+ * <p>
+ * The algorithm is an implementation of
* the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
* algorithm in Martin Porter's snowball project.
- *
+ * </p>
*/
public class DutchStemmer {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java Tue Aug 18 12:55:26 2009
@@ -4,7 +4,7 @@
/**
- *
+ * Base class for payload encoders.
*
**/
public abstract class AbstractEncoder implements PayloadEncoder{
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java Tue Aug 18 12:55:26 2009
@@ -22,7 +22,7 @@
/**
* Encode a character array Float as a {@link org.apache.lucene.index.Payload}.
* <p/>
- * @see {@link org.apache.lucene.analysis.payloads.PayloadHelper#encodeFloat(float, byte[], int)}
+ * @see org.apache.lucene.analysis.payloads.PayloadHelper#encodeFloat(float, byte[], int)
*
**/
public class FloatEncoder extends AbstractEncoder implements PayloadEncoder {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java Tue Aug 18 12:55:26 2009
@@ -20,7 +20,7 @@
/**
- * Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload
+ * Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload.
* <p/>
* NOTE: This interface is subject to change
*
@@ -34,7 +34,7 @@
* @param buffer
* @param offset
* @param length
- * @return
+ * @return encoded {@link Payload}
*/
Payload encode(char [] buffer, int offset, int length);
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java Tue Aug 18 12:55:26 2009
@@ -18,7 +18,7 @@
/**
- *
+ * Utility methods for encoding payloads.
*
**/
public class PayloadHelper {
@@ -60,7 +60,7 @@
* @param offset The offset into the array.
* @return The float that was encoded
*
- * @see # encodeFloat (float)
+ * @see #encodeFloat(float)
*/
public static final float decodeFloat(byte [] bytes, int offset){
Added: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/package.html?rev=805400&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/package.html (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/package.html Tue Aug 18 12:55:26 2009
@@ -0,0 +1,5 @@
+<html><head></head>
+<body>
+Filter for assigning position increments.
+</body>
+</html>
Propchange: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java?rev=805400&r1=805399&r2=805400&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java Tue Aug 18 12:55:26 2009
@@ -28,19 +28,19 @@
import java.io.Reader;
import java.util.*;
-/*
- * An analyzer used primarily at query time to wrap another analyzer and provide a layer of protection
- * which prevents very common words from being passed into queries. For very large indexes the cost
+/**
+ * An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection
+ * which prevents very common words from being passed into queries.
+ * <p>
+ * For very large indexes the cost
* of reading TermDocs for a very common word can be high. This analyzer was created after experience with
* a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for
* this term to take 2 seconds.
- *
+ * </p>
+ * <p>
* Use the various "addStopWords" methods in this class to automate the identification and addition of
* stop words found in an already existing index.
- *
- *
- *
-
+ * </p>
*/
public class QueryAutoStopWordAnalyzer extends Analyzer {
Analyzer delegate;
@@ -50,9 +50,9 @@
public static final float defaultMaxDocFreqPercent = 0.4f;
/**
- * Initializes this analyzer with the Analyzer object that actual produces the tokens
+ * Initializes this analyzer with the Analyzer object that actually produces the tokens
*
- * @param delegate The choice of analyzer that is used to produce the token stream which needs filtering
+ * @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
*/
public QueryAutoStopWordAnalyzer(Analyzer delegate) {
this.delegate = delegate;
@@ -62,7 +62,7 @@
/**
* Automatically adds stop words for all fields with terms exceeding the defaultMaxDocFreqPercent
*
- * @param reader The IndexReader class which will be consulted to identify potential stop words that
+ * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
* exceed the required document frequency
* @return The number of stop words identified.
* @throws IOException
@@ -74,7 +74,7 @@
/**
* Automatically adds stop words for all fields with terms exceeding the maxDocFreqPercent
*
- * @param reader The IndexReader class which will be consulted to identify potential stop words that
+ * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
* exceed the required document frequency
* @param maxDocFreq The maximum number of index documents which can contain a term, after which
* the term is considered to be a stop word
@@ -94,7 +94,7 @@
/**
* Automatically adds stop words for all fields with terms exceeding the maxDocFreqPercent
*
- * @param reader The IndexReader class which will be consulted to identify potential stop words that
+ * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
* exceed the required document frequency
* @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
* contain a term, after which the word is considered to be a stop word.
@@ -114,7 +114,7 @@
/**
* Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
*
- * @param reader The IndexReader class which will be consulted to identify potential stop words that
+ * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
* exceed the required document frequency
* @param fieldName The field for which stopwords will be added
* @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
@@ -129,7 +129,7 @@
/**
* Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
*
- * @param reader The IndexReader class which will be consulted to identify potential stop words that
+ * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
* exceed the required document frequency
* @param fieldName The field for which stopwords will be added
* @param maxDocFreq The maximum number of index documents which
Added: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/package.html?rev=805400&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/package.html (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/package.html Tue Aug 18 12:55:26 2009
@@ -0,0 +1,5 @@
+<html><head></head>
+<body>
+Automatically filter high-frequency stopwords.
+</body>
+</html>
Propchange: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/package.html?rev=805400&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/package.html (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/package.html Tue Aug 18 12:55:26 2009
@@ -0,0 +1,5 @@
+<html><head></head>
+<body>
+Filter to reverse token text.
+</body>
+</html>
Propchange: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/package.html
------------------------------------------------------------------------------
svn:eol-style = native