You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2009/08/02 00:52:35 UTC
svn commit: r799953 [1/4] - in /lucene/java/trunk: ./
contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/ co...
Author: buschmi
Date: Sat Aug 1 22:52:32 2009
New Revision: 799953
URL: http://svn.apache.org/viewvc?rev=799953&view=rev
Log:
LUCENE-1460: Changed TokenStreams/TokenFilters in contrib to use the new TokenStream API.
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordSegmenter.java
lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
lucene/java/trunk/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/TestSmartChineseAnalyzer.java
lucene/java/trunk/contrib/collation/src/java/org/apache/lucene/collation/CollationKeyFilter.java
lucene/java/trunk/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java
lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java
lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java
lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java
lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java
lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
lucene/java/trunk/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat Aug 1 22:52:32 2009
@@ -356,6 +356,9 @@
33. LUCENE-1705: Added IndexWriter.deleteAllDocuments. (Tim Smith via
Mike McCandless)
+34. LUCENE-1460: Changed TokenStreams/TokenFilters in contrib to
+ use the new TokenStream API. (Robert Muir, Michael Busch)
+
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java Sat Aug 1 22:52:32 2009
@@ -19,35 +19,33 @@
import java.io.IOException;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* A TokenFilter that applies {@link ArabicNormalizer} to normalize the orthography.
*
*/
-public class ArabicNormalizationFilter extends TokenFilter {
+public final class ArabicNormalizationFilter extends TokenFilter {
protected ArabicNormalizer normalizer = null;
-
+ private TermAttribute termAtt;
+
public ArabicNormalizationFilter(TokenStream input) {
super(input);
normalizer = new ArabicNormalizer();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
-
-
- public Token next(Token reusableToken) throws IOException {
- if ((reusableToken = input.next(reusableToken)) == null) {
- return null;
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt.termLength());
+ termAtt.setTermLength(newlen);
+ return true;
} else {
- int oldlen = reusableToken.termLength();
- int newlen = normalizer.normalize(reusableToken.termBuffer(), oldlen);
- if (oldlen != newlen)
- reusableToken.setTermLength(newlen);
- return reusableToken;
+ return false;
}
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java Sat Aug 1 22:52:32 2009
@@ -19,43 +19,33 @@
import java.io.IOException;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* A TokenFilter that applies {@link ArabicStemmer} to stem Arabic words..
*
*/
-public class ArabicStemFilter extends TokenFilter {
+public final class ArabicStemFilter extends TokenFilter {
protected ArabicStemmer stemmer = null;
-
+ private TermAttribute termAtt;
+
public ArabicStemFilter(TokenStream input) {
super(input);
stemmer = new ArabicStemmer();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
-
-
- /**
- * @return Returns the next token in the stream, or null at EOS
- */
- public Token next(Token reusableToken) throws IOException {
- /**
- * The actual token in the input stream.
- */
-
-
- if ((reusableToken = input.next(reusableToken)) == null) {
- return null;
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength());
+ termAtt.setTermLength(newlen);
+ return true;
} else {
- int oldlen = reusableToken.termLength();
- int newlen = stemmer.stem(reusableToken.termBuffer(), oldlen);
- if (oldlen != newlen)
- reusableToken.setTermLength(newlen);
- return reusableToken;
+ return false;
}
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java Sat Aug 1 22:52:32 2009
@@ -17,14 +17,13 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
import java.io.IOException;
-import java.util.HashSet;
import java.util.Set;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
/**
* Based on GermanStemFilter
*
@@ -36,10 +35,12 @@
*/
private BrazilianStemmer stemmer = null;
private Set exclusions = null;
-
+ private TermAttribute termAtt;
+
public BrazilianStemFilter(TokenStream in) {
super(in);
stemmer = new BrazilianStemmer();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
public BrazilianStemFilter(TokenStream in, Set exclusiontable) {
@@ -47,26 +48,20 @@
this.exclusions = exclusiontable;
}
- /**
- * @return Returns the next token in the stream, or null at EOS.
- */
- public final Token next(final Token reusableToken)
- throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken == null)
- return null;
-
- String term = nextToken.term();
-
- // Check the exclusion table.
- if (exclusions == null || !exclusions.contains(term)) {
- String s = stemmer.stem(term);
- // If not stemmed, don't waste the time adjusting the token.
- if ((s != null) && !s.equals(term))
- nextToken.setTermBuffer(s);
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String term = termAtt.term();
+ // Check the exclusion table.
+ if (exclusions == null || !exclusions.contains(term)) {
+ String s = stemmer.stem(term);
+ // If not stemmed, don't waste the time adjusting the token.
+ if ((s != null) && !s.equals(term))
+ termAtt.setTermBuffer(s);
+ }
+ return true;
+ } else {
+ return false;
}
- return nextToken;
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java Sat Aug 1 22:52:32 2009
@@ -17,11 +17,14 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.Tokenizer;
-
+import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
/**
* CJKTokenizer was modified from StopTokenizer which does a decent job for
@@ -88,6 +91,10 @@
*/
private boolean preIsTokened = false;
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+ private TypeAttribute typeAtt;
+
//~ Constructors -----------------------------------------------------------
/**
@@ -97,25 +104,26 @@
*/
public CJKTokenizer(Reader in) {
super(in);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
}
//~ Methods ----------------------------------------------------------------
/**
- * Returns the next token in the stream, or null at EOS.
+ * Returns true for the next token in the stream, or false at EOS.
* See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
* for detail.
*
- * @param reusableToken a reusable token
- * @return Token
+ * @return false for end of stream, true otherwise
*
* @throws java.io.IOException - throw IOException when read error <br>
* happened in the InputStream
*
*/
- public final Token next(final Token reusableToken) throws java.io.IOException {
+ public boolean incrementToken() throws IOException {
/** how many character(s) has been stored in buffer */
- assert reusableToken != null;
while(true) { // loop until we find a non-empty token
@@ -147,7 +155,7 @@
break;
} else {
- return null;
+ return false;
}
} else {
//get current character
@@ -252,10 +260,12 @@
}
if (length > 0) {
- return reusableToken.reinit
- (buffer, 0, length, input.correctOffset(start), input.correctOffset(start+length), TOKEN_TYPE_NAMES[tokenType]);
+ termAtt.setTermBuffer(buffer, 0, length);
+ offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start+length));
+ typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]);
+ return true;
} else if (dataLen == -1) {
- return null;
+ return false;
}
// Cycle back and try for the next token (don't
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java Sat Aug 1 22:52:32 2009
@@ -17,12 +17,13 @@
* limitations under the License.
*/
+import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Title: ChineseFilter
@@ -56,19 +57,21 @@
private Map stopTable;
+ private TermAttribute termAtt;
+
public ChineseFilter(TokenStream in) {
super(in);
stopTable = new HashMap(STOP_WORDS.length);
for (int i = 0; i < STOP_WORDS.length; i++)
stopTable.put(STOP_WORDS[i], STOP_WORDS[i]);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
- public final Token next(final Token reusableToken) throws java.io.IOException {
- assert reusableToken != null;
+ public boolean incrementToken() throws IOException {
- for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
- String text = nextToken.term();
+ while (input.incrementToken()) {
+ String text = termAtt.term();
// why not key off token type here assuming ChineseTokenizer comes first?
if (stopTable.get(text) == null) {
@@ -79,7 +82,7 @@
// English word/token should larger than 1 character.
if (text.length()>1) {
- return nextToken;
+ return true;
}
break;
case Character.OTHER_LETTER:
@@ -87,13 +90,13 @@
// One Chinese character as one Chinese word.
// Chinese word extraction to be added later here.
- return nextToken;
+ return true;
}
}
}
- return null;
+ return false;
}
}
\ No newline at end of file
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java Sat Aug 1 22:52:32 2009
@@ -18,10 +18,12 @@
*/
+import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
@@ -56,6 +58,8 @@
public ChineseTokenizer(Reader in) {
super(in);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
private int offset = 0, bufferIndex=0, dataLen=0;
@@ -68,7 +72,9 @@
private int length;
private int start;
-
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+
private final void push(char c) {
if (length == 0) start = offset-1; // start of token
@@ -76,19 +82,20 @@
}
- private final Token flush(final Token token) {
+ private final boolean flush() {
if (length>0) {
//System.out.println(new String(buffer, 0,
//length));
- return token.reinit(buffer, 0, length, input.correctOffset(start), input.correctOffset(start+length));
+ termAtt.setTermBuffer(buffer, 0, length);
+ offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start+length));
+ return true;
}
else
- return null;
+ return false;
}
- public final Token next(final Token reusableToken) throws java.io.IOException {
- assert reusableToken != null;
+ public boolean incrementToken() throws IOException {
length = 0;
start = offset;
@@ -104,7 +111,7 @@
bufferIndex = 0;
}
- if (dataLen == -1) return flush(reusableToken);
+ if (dataLen == -1) return flush();
else
c = ioBuffer[bufferIndex++];
@@ -115,20 +122,20 @@
case Character.LOWERCASE_LETTER:
case Character.UPPERCASE_LETTER:
push(c);
- if (length == MAX_WORD_LEN) return flush(reusableToken);
+ if (length == MAX_WORD_LEN) return flush();
break;
case Character.OTHER_LETTER:
if (length>0) {
bufferIndex--;
offset--;
- return flush(reusableToken);
+ return flush();
}
push(c);
- return flush(reusableToken);
+ return flush();
default:
- if (length>0) return flush(reusableToken);
+ if (length>0) return flush();
break;
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Sat Aug 1 22:52:32 2009
@@ -28,6 +28,12 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
* Base class for decomposition token filters.
@@ -54,6 +60,15 @@
protected final int minSubwordSize;
protected final int maxSubwordSize;
protected final boolean onlyLongestMatch;
+
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+ private FlagsAttribute flagsAtt;
+ private PositionIncrementAttribute posIncAtt;
+ private TypeAttribute typeAtt;
+ private PayloadAttribute payloadAtt;
+
+ private final Token wrapper = new Token();
protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
this(input,makeDictionary(dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
@@ -90,6 +105,13 @@
this.dictionary = new CharArraySet(dictionary.size(), false);
addAllLowerCase(this.dictionary, dictionary);
}
+
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
+ posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
}
/**
@@ -105,26 +127,54 @@
return dict;
}
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ private final void setToken(final Token token) throws IOException {
+ termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ flagsAtt.setFlags(token.getFlags());
+ typeAtt.setType(token.type());
+ offsetAtt.setOffset(token.startOffset(), token.endOffset());
+ posIncAtt.setPositionIncrement(token.getPositionIncrement());
+ payloadAtt.setPayload(token.getPayload());
+ }
+
+ public final boolean incrementToken() throws IOException {
if (tokens.size() > 0) {
- return (Token)tokens.removeFirst();
- }
-
- Token nextToken = input.next(reusableToken);
- if (nextToken == null) {
- return null;
+ setToken((Token)tokens.removeFirst());
+ return true;
}
- decompose(nextToken);
+ if (input.incrementToken() == false)
+ return false;
+
+ wrapper.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ wrapper.setStartOffset(offsetAtt.startOffset());
+ wrapper.setEndOffset(offsetAtt.endOffset());
+ wrapper.setFlags(flagsAtt.getFlags());
+ wrapper.setType(typeAtt.type());
+ wrapper.setPositionIncrement(posIncAtt.getPositionIncrement());
+ wrapper.setPayload(payloadAtt.getPayload());
+
+ decompose(wrapper);
if (tokens.size() > 0) {
- return (Token)tokens.removeFirst();
+ setToken((Token)tokens.removeFirst());
+ return true;
} else {
- return null;
+ return false;
}
}
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
+ }
+
protected static final void addAllLowerCase(Set target, Collection col) {
Iterator iter=col.iterator();
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java Sat Aug 1 22:52:32 2009
@@ -17,13 +17,13 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
import java.io.IOException;
import java.util.Set;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
/**
* A filter that stems German words. It supports a table of words that should
* not be stemmed at all. The stemmer used can be changed at runtime after the
@@ -40,10 +40,13 @@
private GermanStemmer stemmer = null;
private Set exclusionSet = null;
+ private TermAttribute termAtt;
+
public GermanStemFilter( TokenStream in )
{
super(in);
stemmer = new GermanStemmer();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
@@ -56,26 +59,22 @@
}
/**
- * @return Returns the next token in the stream, or null at EOS
+ * @return Returns true for next token in the stream, or false at EOS
*/
- public final Token next(final Token reusableToken)
- throws IOException
- {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
-
- if (nextToken == null)
- return null;
-
- String term = nextToken.term();
- // Check the exclusion table.
- if (exclusionSet == null || !exclusionSet.contains(term)) {
- String s = stemmer.stem(term);
- // If not stemmed, don't waste the time adjusting the token.
- if ((s != null) && !s.equals(term))
- nextToken.setTermBuffer(s);
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String term = termAtt.term();
+ // Check the exclusion table.
+ if (exclusionSet == null || !exclusionSet.contains(term)) {
+ String s = stemmer.stem(term);
+ // If not stemmed, don't waste the time adjusting the token.
+ if ((s != null) && !s.equals(term))
+ termAtt.setTermBuffer(s);
+ }
+ return true;
+ } else {
+ return false;
}
- return nextToken;
}
/**
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Sat Aug 1 22:52:32 2009
@@ -16,9 +16,11 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Normalizes token text to lower case, analyzing given ("greek") charset.
@@ -28,26 +30,26 @@
{
char[] charset;
+ private TermAttribute termAtt;
+
public GreekLowerCaseFilter(TokenStream in, char[] charset)
{
super(in);
this.charset = charset;
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
- public final Token next(final Token reusableToken) throws java.io.IOException
- {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
-
- if (nextToken == null)
- return null;
-
- char[] chArray = nextToken.termBuffer();
- int chLen = nextToken.termLength();
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ char[] chArray = termAtt.termBuffer();
+ int chLen = termAtt.termLength();
for (int i = 0; i < chLen; i++)
{
- chArray[i] = GreekCharsets.toLowerCase(chArray[i], charset);
+ chArray[i] = GreekCharsets.toLowerCase(chArray[i], charset);
}
- return nextToken;
+ return true;
+ } else {
+ return false;
+ }
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Sat Aug 1 22:52:32 2009
@@ -25,6 +25,7 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Removes elisions from a token stream. For example, "l'avion" (the plane) will be
@@ -36,7 +37,8 @@
*/
public class ElisionFilter extends TokenFilter {
private Set articles = null;
-
+ private TermAttribute termAtt;
+
private static char[] apostrophes = {'\'', 'â'};
public void setArticles(Set articles) {
@@ -54,6 +56,7 @@
super(input);
this.articles = new HashSet(Arrays.asList(new String[] { "l", "m", "t",
"qu", "n", "s", "j" }));
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
@@ -62,6 +65,7 @@
public ElisionFilter(TokenStream input, Set articles) {
super(input);
setArticles(articles);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
@@ -70,39 +74,50 @@
public ElisionFilter(TokenStream input, String[] articles) {
super(input);
setArticles(new HashSet(Arrays.asList(articles)));
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
* Returns the next input Token with term() without elisioned start
*/
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken == null)
- return null;
-
- char[] termBuffer = nextToken.termBuffer();
- int termLength = nextToken.termLength();
-
- int minPoz = Integer.MAX_VALUE;
- for (int i = 0; i < apostrophes.length; i++) {
- char apos = apostrophes[i];
- // The equivalent of String.indexOf(ch)
- for (int poz = 0; poz < termLength ; poz++) {
- if (termBuffer[poz] == apos) {
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ char[] termBuffer = termAtt.termBuffer();
+ int termLength = termAtt.termLength();
+
+ int minPoz = Integer.MAX_VALUE;
+ for (int i = 0; i < apostrophes.length; i++) {
+ char apos = apostrophes[i];
+ // The equivalent of String.indexOf(ch)
+ for (int poz = 0; poz < termLength ; poz++) {
+ if (termBuffer[poz] == apos) {
minPoz = Math.min(poz, minPoz);
break;
+ }
}
}
- }
- // An apostrophe has been found. If the prefix is an article strip it off.
- if (minPoz != Integer.MAX_VALUE
- && articles.contains(new String(nextToken.termBuffer(), 0, minPoz).toLowerCase())) {
- nextToken.setTermBuffer(nextToken.termBuffer(), minPoz + 1, nextToken.termLength() - (minPoz + 1));
- }
+ // An apostrophe has been found. If the prefix is an article strip it off.
+ if (minPoz != Integer.MAX_VALUE
+ && articles.contains(new String(termAtt.termBuffer(), 0, minPoz).toLowerCase())) {
+ termAtt.setTermBuffer(termAtt.termBuffer(), minPoz + 1, termAtt.termLength() - (minPoz + 1));
+ }
- return nextToken;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
}
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
+ }
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java Sat Aug 1 22:52:32 2009
@@ -20,6 +20,7 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
import java.util.HashSet;
@@ -39,10 +40,13 @@
*/
private FrenchStemmer stemmer = null;
private Set exclusions = null;
+
+ private TermAttribute termAtt;
public FrenchStemFilter( TokenStream in ) {
super(in);
stemmer = new FrenchStemmer();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
@@ -52,25 +56,23 @@
}
/**
- * @return Returns the next token in the stream, or null at EOS
+ * @return Returns true for the next token in the stream, or false at EOS
*/
- public final Token next(final Token reusableToken)
- throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken == null)
- return null;
-
- String term = nextToken.term();
-
- // Check the exclusion table
- if ( exclusions == null || !exclusions.contains( term ) ) {
- String s = stemmer.stem( term );
- // If not stemmed, don't waste the time adjusting the token.
- if ((s != null) && !s.equals( term ) )
- nextToken.setTermBuffer(s);
- }
- return nextToken;
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String term = termAtt.term();
+
+ // Check the exclusion table
+ if ( exclusions == null || !exclusions.contains( term ) ) {
+ String s = stemmer.stem( term );
+ // If not stemmed, don't waste the time adjusting the token.
+ if ((s != null) && !s.equals( term ) )
+ termAtt.setTermBuffer(s);
+ }
+ return true;
+ } else {
+ return false;
+ }
}
/**
* Set a alternative/custom FrenchStemmer for this filter.
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java Sat Aug 1 22:52:32 2009
@@ -27,8 +27,19 @@
*/
public class EmptyTokenStream extends TokenStream {
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- return null;
+ public final boolean incrementToken() throws IOException {
+ return false;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -24,6 +24,7 @@
/**
* Links two PrefixAwareTokenFilter
+ * @deprecated
*/
public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -29,6 +29,7 @@
* to be used when updating the token values in the second stream based on that token.
*
* The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+ * @deprecated
*/
public class PrefixAwareTokenFilter extends TokenStream {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java Sat Aug 1 22:52:32 2009
@@ -17,10 +17,16 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-
-import java.io.IOException;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
* A token stream containing a single token.
@@ -29,34 +35,66 @@
private boolean exhausted = false;
// The token needs to be immutable, so work with clones!
- private Token token;
+ private Token singleToken;
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+ private FlagsAttribute flagsAtt;
+ private PositionIncrementAttribute posIncAtt;
+ private TypeAttribute typeAtt;
+ private PayloadAttribute payloadAtt;
public SingleTokenTokenStream(Token token) {
assert token != null;
- this.token = (Token) token.clone();
+ this.singleToken = (Token) token.clone();
+
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
+ posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
}
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ public final boolean incrementToken() throws IOException {
if (exhausted) {
- return null;
+ return false;
}
+
+ Token clone = (Token) singleToken.clone();
+
+ termAtt.setTermBuffer(clone.termBuffer(), 0, clone.termLength());
+ offsetAtt.setOffset(clone.startOffset(), clone.endOffset());
+ flagsAtt.setFlags(clone.getFlags());
+ typeAtt.setType(clone.type());
+ posIncAtt.setPositionIncrement(clone.getPositionIncrement());
+ payloadAtt.setPayload(clone.getPayload());
exhausted = true;
- return (Token) token.clone();
+ return true;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
}
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
+ }
public void reset() throws IOException {
exhausted = false;
}
public Token getToken() {
- return (Token) token.clone();
+ return (Token) singleToken.clone();
}
public void setToken(Token token) {
- this.token = (Token) token.clone();
+ this.singleToken = (Token) token.clone();
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -20,9 +20,10 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
-import java.util.LinkedList;
/**
* Tokenizes the given token into n-grams of given size(s).
@@ -66,11 +67,18 @@
private int minGram;
private int maxGram;
private Side side;
- private LinkedList ngrams;
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curGramSize;
+
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+
protected EdgeNGramTokenFilter(TokenStream input) {
super(input);
- this.ngrams = new LinkedList();
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
/**
@@ -99,7 +107,8 @@
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
- this.ngrams = new LinkedList();
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
/**
@@ -114,54 +123,42 @@
this(input, Side.getSide(sideLabel), minGram, maxGram);
}
- /** Returns the next token in the stream, or null at EOS. */
- public final Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- if (!ngrams.isEmpty()) {
- return (Token)ngrams.removeFirst();
- }
-
- Token token = null;
-
- while (ngrams.isEmpty() && (token = input.next()) != null) {
- ngram(token);
- }
-
- if (token == null) {
- return null;
- }
-
- if (!ngrams.isEmpty()) {
- return (Token)ngrams.removeFirst();
- } else {
- return null;
- }
- }
-
- private void ngram(final Token token) {
- int termLength = token.termLength();
- char[] termBuffer = token.termBuffer();
- int gramSize = minGram;
- while (gramSize <= maxGram) {
- // if the remaining input is too short, we can't generate any n-grams
- if (gramSize > termLength) {
- return;
+ public final boolean incrementToken() throws IOException {
+ while (true) {
+ if (curTermBuffer == null) {
+ if (!input.incrementToken()) {
+ return false;
+ } else {
+ curTermBuffer = (char[]) termAtt.termBuffer().clone();
+ curTermLength = termAtt.termLength();
+ curGramSize = minGram;
+ }
}
-
- // if we have hit the end of our n-gram size range, quit
- if (gramSize > maxGram) {
- return;
+ if (curGramSize <= maxGram) {
+ if (! (curGramSize > curTermLength // if the remaining input is too short, we can't generate any n-grams
+ || curGramSize > maxGram)) { // if we have hit the end of our n-gram size range, quit
+ // grab gramSize chars from front or back
+ int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
+ int end = start + curGramSize;
+ offsetAtt.setOffset(start, end);
+ termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
+ curGramSize++;
+ return true;
+ }
}
-
- // grab gramSize chars from front or back
- int start = side == Side.FRONT ? 0 : termLength - gramSize;
- int end = start + gramSize;
- Token tok = (Token) token.clone();
- tok.setStartOffset(start);
- tok.setEndOffset(end);
- tok.setTermBuffer(termBuffer, start, gramSize);
- ngrams.add(tok);
- gramSize++;
+ curTermBuffer = null;
}
}
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
+ }
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Sat Aug 1 22:52:32 2009
@@ -20,6 +20,8 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
import java.io.Reader;
@@ -35,6 +37,9 @@
public static final Side DEFAULT_SIDE = Side.FRONT;
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
// Replace this with an enum when the Java 1.5 upgrade is made, the impl will be simplified
/** Specifies which side of the input the n-gram should be generated from */
@@ -100,6 +105,9 @@
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
+
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
/**
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
@@ -114,8 +122,7 @@
}
/** Returns the next token in the stream, or null at EOS. */
- public final Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ public final boolean incrementToken() throws IOException {
// if we are just starting, read the whole input
if (!started) {
started = true;
@@ -128,21 +135,32 @@
// if the remaining input is too short, we can't generate any n-grams
if (gramSize > inLen) {
- return null;
+ return false;
}
// if we have hit the end of our n-gram size range, quit
if (gramSize > maxGram) {
- return null;
+ return false;
}
// grab gramSize chars from front or back
int start = side == Side.FRONT ? 0 : inLen - gramSize;
int end = start + gramSize;
- reusableToken.setTermBuffer(inStr, start, gramSize);
- reusableToken.setStartOffset(input.correctOffset(start));
- reusableToken.setEndOffset(input.correctOffset(end));
+ termAtt.setTermBuffer(inStr, start, gramSize);
+ offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(end));
gramSize++;
- return reusableToken;
+ return true;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -17,12 +17,13 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-
-import java.io.IOException;
-import java.util.LinkedList;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Tokenizes the input into n-grams of the given size(s).
@@ -32,7 +33,14 @@
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
private int minGram, maxGram;
- private LinkedList ngrams;
+
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curGramSize;
+ private int curPos;
+
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
/**
* Creates NGramTokenFilter with given min and max n-grams.
@@ -50,7 +58,9 @@
}
this.minGram = minGram;
this.maxGram = maxGram;
- this.ngrams = new LinkedList();
+
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
/**
@@ -62,40 +72,41 @@
}
/** Returns the next token in the stream, or null at EOS. */
- public final Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- if (!ngrams.isEmpty()) {
- return (Token)ngrams.removeFirst();
- }
-
- Token token = null;
-
- while (ngrams.isEmpty() && (token = input.next()) != null) {
- ngram(token);
- }
-
- if (token == null) {
- return null;
- }
-
- if (!ngrams.isEmpty()) {
- return (Token)ngrams.removeFirst();
- } else {
- return null;
+ public final boolean incrementToken() throws IOException {
+ while (true) {
+ if (curTermBuffer == null) {
+ if (!input.incrementToken()) {
+ return false;
+ } else {
+ curTermBuffer = (char[]) termAtt.termBuffer().clone();
+ curTermLength = termAtt.termLength();
+ curGramSize = minGram;
+ curPos = 0;
+ }
+ }
+ while (curGramSize <= maxGram) {
+ while (curPos+curGramSize <= curTermLength) { // while there is input
+ termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
+ offsetAtt.setOffset(curPos, curPos+curGramSize);
+ curPos++;
+ return true;
+ }
+ curGramSize++; // increase n-gram size
+ curPos = 0;
+ }
+ curTermBuffer = null;
}
}
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
- private void ngram(Token token) {
- char[] termBuffer = token.termBuffer();
- int termLength = token.termLength();
- int gramSize = minGram;
- while (gramSize <= maxGram) {
- int pos = 0; // reset to beginning of string
- while (pos+gramSize <= termLength) { // while there is input
- ngrams.add(token.clone(termBuffer, pos, gramSize, pos, pos+gramSize));
- pos++;
- }
- gramSize++; // increase n-gram size
- }
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Sat Aug 1 22:52:32 2009
@@ -19,6 +19,8 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
import java.io.Reader;
@@ -36,6 +38,9 @@
private int inLen;
private String inStr;
private boolean started = false;
+
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
/**
* Creates NGramTokenizer with given min and max n-grams.
@@ -53,6 +58,9 @@
}
this.minGram = minGram;
this.maxGram = maxGram;
+
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
/**
* Creates NGramTokenizer with default min and max n-grams.
@@ -63,8 +71,7 @@
}
/** Returns the next token in the stream, or null at EOS. */
- public final Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ public final boolean incrementToken() throws IOException {
if (!started) {
started = true;
gramSize = minGram;
@@ -78,13 +85,27 @@
pos = 0; // reset to beginning of string
gramSize++; // increase n-gram size
if (gramSize > maxGram) // we are done
- return null;
+ return false;
if (pos+gramSize > inLen)
- return null;
+ return false;
}
int oldPos = pos;
pos++;
- return reusableToken.reinit(inStr, oldPos, gramSize, input.correctOffset(oldPos), input.correctOffset(oldPos+gramSize));
+ termAtt.setTermBuffer(inStr, oldPos, gramSize);
+ offsetAtt.setOffset(input.correctOffset(oldPos), input.correctOffset(oldPos+gramSize));
+ return true;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java Sat Aug 1 22:52:32 2009
@@ -17,15 +17,15 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Set;
import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* A filter that stems Dutch words. It supports a table of words that should
@@ -39,10 +39,13 @@
*/
private DutchStemmer stemmer = null;
private Set exclusions = null;
+
+ private TermAttribute termAtt;
public DutchStemFilter(TokenStream _in) {
super(_in);
stemmer = new DutchStemmer();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
@@ -62,24 +65,23 @@
}
/**
- * @return Returns the next token in the stream, or null at EOS
+ * Returns the next token in the stream, or null at EOS
*/
- public Token next(Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken == null)
- return null;
-
- String term = nextToken.term();
-
- // Check the exclusion table.
- if (exclusions == null || !exclusions.contains(term)) {
- String s = stemmer.stem(term);
- // If not stemmed, don't waste the time adjusting the token.
- if ((s != null) && !s.equals(term))
- nextToken.setTermBuffer(s);
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String term = termAtt.term();
+
+ // Check the exclusion table.
+ if (exclusions == null || !exclusions.contains(term)) {
+ String s = stemmer.stem(term);
+ // If not stemmed, don't waste the time adjusting the token.
+ if ((s != null) && !s.equals(term))
+ termAtt.setTermBuffer(s);
+ }
+ return true;
+ } else {
+ return false;
}
- return nextToken;
}
/**
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -16,14 +16,13 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import java.io.IOException;
-
/**
* Characters before the delimiter are the "token", those after are the payload.
@@ -37,7 +36,7 @@
*
* @see PayloadEncoder
*/
-public class DelimitedPayloadTokenFilter extends TokenFilter {
+public final class DelimitedPayloadTokenFilter extends TokenFilter {
public static final char DEFAULT_DELIMITER = '|';
protected char delimiter = DEFAULT_DELIMITER;
protected TermAttribute termAtt;
@@ -83,27 +82,4 @@
}
return result;
}
-
-
- public Token next(Token reusableToken) throws IOException {
- Token result = input.next(reusableToken);
- if (result != null) {
- final char[] buffer = result.termBuffer();
- final int length = result.termLength();
- boolean seen = false;
- for (int i = 0; i < length; i++) {
- if (buffer[i] == delimiter) {
- result.setTermBuffer(buffer, 0, i);
- result.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
- seen = true;
- break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same
- }
- }
- if (seen == false) {
- //no delimiter
- payAtt.setPayload(null);
- }
- }
- return result;
- }
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -20,6 +20,8 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
import java.io.IOException;
@@ -34,19 +36,37 @@
private String typeMatch;
private Payload thePayload;
+ private PayloadAttribute payloadAtt;
+ private TypeAttribute typeAtt;
+
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input);
//Need to encode the payload
thePayload = new Payload(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;
+ payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
}
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken != null && nextToken.type().equals(typeMatch)){
- nextToken.setPayload(thePayload);
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ if (typeAtt.type().equals(typeMatch))
+ payloadAtt.setPayload(thePayload);
+ return true;
+ } else {
+ return false;
}
- return nextToken;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -17,13 +17,15 @@
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
-import java.io.IOException;
-
/**
* Adds the {@link org.apache.lucene.analysis.Token#setStartOffset(int)}
@@ -32,22 +34,37 @@
*
**/
public class TokenOffsetPayloadTokenFilter extends TokenFilter {
-
+ protected OffsetAttribute offsetAtt;
+ protected PayloadAttribute payAtt;
public TokenOffsetPayloadTokenFilter(TokenStream input) {
super(input);
+ offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
}
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken != null){
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
byte[] data = new byte[8];
- PayloadHelper.encodeInt(nextToken.startOffset(), data, 0);
- PayloadHelper.encodeInt(nextToken.endOffset(), data, 4);
+ PayloadHelper.encodeInt(offsetAtt.startOffset(), data, 0);
+ PayloadHelper.encodeInt(offsetAtt.endOffset(), data, 4);
Payload payload = new Payload(data);
- nextToken.setPayload(payload);
+ payAtt.setPayload(payload);
+ return true;
+ } else {
+ return false;
}
- return nextToken;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
\ No newline at end of file
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -20,6 +20,8 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
import java.io.IOException;
@@ -32,19 +34,37 @@
*
**/
public class TypeAsPayloadTokenFilter extends TokenFilter {
+ private PayloadAttribute payloadAtt;
+ private TypeAttribute typeAtt;
public TypeAsPayloadTokenFilter(TokenStream input) {
super(input);
-
+ payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
}
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken != null && nextToken.type() != null && nextToken.type().equals("") == false){
- nextToken.setPayload(new Payload(nextToken.type().getBytes("UTF-8")));
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String type = typeAtt.type();
+ if (type != null && type.equals("") == false) {
+ payloadAtt.setPayload(new Payload(type.getBytes("UTF-8")));
+ }
+ return true;
+ } else {
+ return false;
}
- return nextToken;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
\ No newline at end of file
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java Sat Aug 1 22:52:32 2009
@@ -22,6 +22,7 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/** Set the positionIncrement of all tokens to the "positionIncrement",
* except the first return token which retains its original positionIncrement value.
@@ -34,6 +35,8 @@
/** The first token must have non-zero positionIncrement **/
private boolean firstTokenPositioned = false;
+
+ private PositionIncrementAttribute posIncrAtt;
/**
* Constructs a PositionFilter that assigns a position increment of zero to
@@ -43,6 +46,7 @@
*/
public PositionFilter(final TokenStream input) {
super(input);
+ posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
}
/**
@@ -58,18 +62,29 @@
this.positionIncrement = positionIncrement;
}
- public Token next(Token reusableToken) throws IOException {
-
- assert reusableToken != null;
- reusableToken = input.next(reusableToken);
- if (null != reusableToken) {
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
if (firstTokenPositioned) {
- reusableToken.setPositionIncrement(positionIncrement);
+ posIncrAtt.setPositionIncrement(positionIncrement);
} else {
firstTokenPositioned = true;
}
+ return true;
+ } else {
+ return false;
}
- return reusableToken;
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
public void reset() throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java Sat Aug 1 22:52:32 2009
@@ -19,7 +19,7 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
@@ -30,16 +30,20 @@
*/
public final class ReverseStringFilter extends TokenFilter {
+ private TermAttribute termAtt;
+
public ReverseStringFilter(TokenStream in) {
super(in);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
- public final Token next(Token in) throws IOException {
- assert in != null;
- Token token=input.next(in);
- if( token == null ) return null;
- reverse( token.termBuffer(), token.termLength() );
- return token;
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ reverse( termAtt.termBuffer(), termAtt.termLength() );
+ return true;
+ } else {
+ return false;
+ }
}
public static String reverse( final String input ){
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java Sat Aug 1 22:52:32 2009
@@ -17,9 +17,12 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Normalizes token text to lower case, analyzing given ("russian") charset.
@@ -31,26 +34,27 @@
{
char[] charset;
+ private TermAttribute termAtt;
+
public RussianLowerCaseFilter(TokenStream in, char[] charset)
{
super(in);
this.charset = charset;
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
- public final Token next(final Token reusableToken) throws java.io.IOException
+ public final boolean incrementToken() throws IOException
{
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
-
- if (nextToken == null)
- return null;
-
- char[] chArray = nextToken.termBuffer();
- int chLen = nextToken.termLength();
+ if (input.incrementToken()) {
+ char[] chArray = termAtt.termBuffer();
+ int chLen = termAtt.termLength();
for (int i = 0; i < chLen; i++)
{
- chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset);
+ chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset);
}
- return nextToken;
+ return true;
+ } else {
+ return false;
+ }
}
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java Sat Aug 1 22:52:32 2009
@@ -20,6 +20,8 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
import java.io.IOException;
/**
@@ -37,29 +39,32 @@
*/
private RussianStemmer stemmer = null;
+ private TermAttribute termAtt;
+
public RussianStemFilter(TokenStream in, char[] charset)
{
super(in);
stemmer = new RussianStemmer(charset);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
- * @return Returns the next token in the stream, or null at EOS
+ * Returns the next token in the stream, or null at EOS
*/
- public final Token next(final Token reusableToken) throws IOException
+ public final boolean incrementToken() throws IOException
{
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken == null)
- return null;
-
- String term = nextToken.term();
+ if (input.incrementToken()) {
+ String term = termAtt.term();
String s = stemmer.stem(term);
if (s != null && !s.equals(term))
- nextToken.setTermBuffer(s);
- return nextToken;
+ termAtt.setTermBuffer(s);
+ return true;
+ } else {
+ return false;
+ }
}
+
/**
* Set a alternative/custom RussianStemmer for this filter.
*/