You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2009/08/02 00:52:35 UTC
svn commit: r799953 [3/4] - in /lucene/java/trunk: ./
contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/ co...
Modified: lucene/java/trunk/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (original)
+++ lucene/java/trunk/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java Sat Aug 1 22:52:32 2009
@@ -24,6 +24,7 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.IndexableBinaryStringTools;
import java.io.IOException;
@@ -69,9 +70,10 @@
* java.text.Collator over several languages.
* </p>
*/
-public class ICUCollationKeyFilter extends TokenFilter {
+public final class ICUCollationKeyFilter extends TokenFilter {
private Collator collator = null;
private RawCollationKey reusableKey = new RawCollationKey();
+ private TermAttribute termAtt;
/**
*
@@ -81,25 +83,26 @@
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
super(input);
this.collator = collator;
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
- public final Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken != null) {
- char[] termBuffer = nextToken.termBuffer();
- String termText = new String(termBuffer, 0, nextToken.termLength());
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ char[] termBuffer = termAtt.termBuffer();
+ String termText = new String(termBuffer, 0, termAtt.termLength());
collator.getRawCollationKey(termText, reusableKey);
ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
int encodedLength
= IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
if (encodedLength > termBuffer.length) {
- nextToken.resizeTermBuffer(encodedLength);
+ termAtt.resizeTermBuffer(encodedLength);
}
- nextToken.setTermLength(encodedLength);
- CharBuffer wrappedTermBuffer = CharBuffer.wrap(nextToken.termBuffer());
+ termAtt.setTermLength(encodedLength);
+ CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+ return true;
+ } else {
+ return false;
}
- return nextToken;
}
}
Modified: lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Sat Aug 1 22:52:32 2009
@@ -28,6 +28,8 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -193,11 +195,15 @@
ch = 0;
}
- public Token next( Token reusableToken ) throws IOException {
+ TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ public boolean incrementToken() throws IOException {
if( !getNextPartialSnippet() )
- return null;
- reusableToken.reinit( snippet, startTerm, lenTerm, startOffset, startOffset + lenTerm );
- return reusableToken;
+ return false;
+
+ termAtt.setTermBuffer(snippet, startTerm, lenTerm);
+ offsetAtt.setOffset(startOffset, startOffset + lenTerm);
+ return true;
}
public int getFinalOffset() {
Modified: lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java (original)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java Sat Aug 1 22:52:32 2009
@@ -295,14 +295,21 @@
public TokenArrayAnalyzer( Token... tokens ){
this.tokens = tokens;
}
+
public TokenStream tokenStream(String fieldName, Reader reader) {
- return new TokenStream(){
+ final Token reusableToken = new Token();
+
+ TokenStream.setOnlyUseNewAPI(true);
+ TokenStream ts = new TokenStream(){
int p = 0;
- public Token next( Token reusableToken ) throws IOException {
- if( p >= tokens.length ) return null;
- return tokens[p++];
+ public boolean incrementToken() throws IOException {
+ if( p >= tokens.length ) return false;
+ tokens[p++].copyTo(reusableToken);
+ return true;
}
};
+ ts.addAttributeImpl(reusableToken);
+ return ts;
}
}
}
Modified: lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Sat Aug 1 22:52:32 2009
@@ -27,6 +27,7 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -44,6 +45,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.util.AttributeSource;
/**
* Asserts equality of content and behaviour of two index readers.
@@ -175,23 +177,26 @@
t.setPayload(new Payload(new byte[]{2}));
tokens.add(t);
tokens.add(createToken("fin", 7, 9));
- document.add(new Field("f", new TokenStream() {
+ final Token reusableToken = new Token();
+ TokenStream ts = new TokenStream() {
Iterator<Token> it = tokens.iterator();
-
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+
+ public final boolean incrementToken() throws IOException {
if (!it.hasNext()) {
- return null;
+ return false;
}
- // Resettable token streams need to return clones.
- Token nextToken = (Token) it.next();
- return (Token) nextToken.clone();
+
+ reusableToken.reinit(it.next());
+ return true;
}
public void reset() throws IOException {
it = tokens.iterator();
}
- }));
+ };
+ ts.addAttributeImpl(reusableToken);
+
+ document.add(new Field("f", ts));
}
}
}
Modified: lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java (original)
+++ lucene/java/trunk/contrib/lucli/src/java/lucli/LuceneMethods.java Sat Aug 1 22:52:32 2009
@@ -75,6 +75,8 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -317,11 +319,14 @@
int position = 0;
// Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader);
+ TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
+ PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
+
try {
- for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
- position += (nextToken.getPositionIncrement() - 1);
+ while (stream.incrementToken()) {
+ position += (posIncrAtt.getPositionIncrement() - 1);
position++;
- String name = nextToken.term();
+ String name = termAtt.term();
Integer Count = (Integer) tokenMap.get(name);
if (Count == null) { // not in there yet
tokenMap.put(name, new Integer(1)); //first one
Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java Sat Aug 1 22:52:32 2009
@@ -31,9 +31,13 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PorterStemFilter;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
/**
* Various fulltext analysis utilities avoiding redundant code in several
@@ -71,21 +75,24 @@
public TokenStream tokenStream(final String fieldName, Reader reader) {
return new TokenFilter(child.tokenStream(fieldName, reader)) {
private int position = -1;
-
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken); // from filter super class
- log.println(toString(nextToken));
- return nextToken;
+ private TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ private PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ private OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ private TypeAttribute typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+
+ public boolean incrementToken() throws IOException {
+ boolean hasNext = input.incrementToken();
+ log.println(toString(hasNext));
+ return hasNext;
}
- private String toString(Token token) {
- if (token == null) return "[" + logName + ":EOS:" + fieldName + "]\n";
+ private String toString(boolean hasNext) {
+ if (!hasNext) return "[" + logName + ":EOS:" + fieldName + "]\n";
- position += token.getPositionIncrement();
+ position += posIncrAtt.getPositionIncrement();
return "[" + logName + ":" + position + ":" + fieldName + ":"
- + token.term() + ":" + token.startOffset()
- + "-" + token.endOffset() + ":" + token.type()
+ + termAtt.term() + ":" + offsetAtt.startOffset()
+ + "-" + offsetAtt.endOffset() + ":" + typeAtt.type()
+ "]";
}
};
@@ -121,9 +128,8 @@
return new TokenFilter(child.tokenStream(fieldName, reader)) {
private int todo = maxTokens;
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- return --todo >= 0 ? input.next(reusableToken) : null;
+ public boolean incrementToken() throws IOException {
+ return --todo >= 0 ? input.incrementToken() : false;
}
};
}
@@ -240,11 +246,10 @@
final ArrayList tokens2 = new ArrayList();
TokenStream tokenStream = new TokenFilter(child.tokenStream(fieldName, reader)) {
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken); // from filter super class
- if (nextToken != null) tokens2.add(nextToken.clone());
- return nextToken;
+ public boolean incrementToken() throws IOException {
+ boolean hasNext = input.incrementToken();
+ if (hasNext) tokens2.add(captureState());
+ return hasNext;
}
};
@@ -255,10 +260,10 @@
private Iterator iter = tokens.iterator();
- public Token next(Token token) {
- assert token != null;
- if (!iter.hasNext()) return null;
- return (Token) iter.next();
+ public boolean incrementToken() {
+ if (!iter.hasNext()) return false;
+ restoreState((AttributeSource.State) iter.next());
+ return true;
}
};
}
@@ -302,13 +307,13 @@
// compute frequencies of distinct terms
HashMap map = new HashMap();
TokenStream stream = analyzer.tokenStream("", new StringReader(text));
+ TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
try {
- final Token reusableToken = new Token();
- for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
- MutableInteger freq = (MutableInteger) map.get(nextToken.term());
+ while (stream.incrementToken()) {
+ MutableInteger freq = (MutableInteger) map.get(termAtt.term());
if (freq == null) {
freq = new MutableInteger(1);
- map.put(nextToken.term(), freq);
+ map.put(termAtt.term(), freq);
} else {
freq.setValue(freq.intValue() + 1);
}
Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Sat Aug 1 22:52:32 2009
@@ -28,8 +28,10 @@
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
@@ -274,18 +276,21 @@
return new TokenStream() {
private Iterator iter = keywords.iterator();
private int start = 0;
- public Token next(final Token reusableToken) {
- assert reusableToken != null;
- if (!iter.hasNext()) return null;
+ private TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ private OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+
+ public boolean incrementToken() {
+ if (!iter.hasNext()) return false;
Object obj = iter.next();
if (obj == null)
throw new IllegalArgumentException("keyword must not be null");
String term = obj.toString();
- reusableToken.reinit(term, start, start+reusableToken.termLength());
+ termAtt.setTermBuffer(term);
+ offsetAtt.setOffset(start, start+termAtt.termLength());
start += term.length() + 1; // separate words by 1 (blank) character
- return reusableToken;
+ return true;
}
};
}
@@ -350,13 +355,17 @@
int numTokens = 0;
int numOverlapTokens = 0;
int pos = -1;
- final Token reusableToken = new Token();
- for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
- String term = nextToken.term();
+
+ TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
+ PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
+ OffsetAttribute offsetAtt = (OffsetAttribute) stream.addAttribute(OffsetAttribute.class);
+
+ while (stream.incrementToken()) {
+ String term = termAtt.term();
if (term.length() == 0) continue; // nothing to do
// if (DEBUG) System.err.println("token='" + term + "'");
numTokens++;
- final int posIncr = nextToken.getPositionIncrement();
+ final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr == 0)
numOverlapTokens++;
pos += posIncr;
@@ -369,7 +378,7 @@
if (stride == 1) {
positions.add(pos);
} else {
- positions.add(pos, nextToken.startOffset(), nextToken.endOffset());
+ positions.add(pos, offsetAtt.startOffset(), offsetAtt.endOffset());
}
}
Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java Sat Aug 1 22:52:32 2009
@@ -30,8 +30,9 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
@@ -331,6 +332,8 @@
private Matcher matcher;
private int pos = 0;
private static final Locale locale = Locale.getDefault();
+ private TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ private OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) {
this.str = str;
@@ -338,9 +341,8 @@
this.toLowerCase = toLowerCase;
}
- public Token next(final Token reusableToken) {
- assert reusableToken != null;
- if (matcher == null) return null;
+ public final boolean incrementToken() {
+ if (matcher == null) return false;
while (true) { // loop takes care of leading and trailing boundary cases
int start = pos;
@@ -357,9 +359,11 @@
if (start != end) { // non-empty match (header/trailer)
String text = str.substring(start, end);
if (toLowerCase) text = text.toLowerCase(locale);
- return reusableToken.reinit(text, start, end);
+ termAtt.setTermBuffer(text);
+ offsetAtt.setOffset(start, end);
+ return true;
}
- if (!isMatch) return null;
+ if (!isMatch) return false;
}
}
@@ -381,6 +385,8 @@
private final boolean toLowerCase;
private final Set stopWords;
private static final Locale locale = Locale.getDefault();
+ private TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ private OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) {
this.str = str;
@@ -389,8 +395,7 @@
this.stopWords = stopWords;
}
- public Token next(final Token reusableToken) {
- assert reusableToken != null;
+ public boolean incrementToken() {
// cache loop instance vars (performance)
String s = str;
int len = s.length();
@@ -430,9 +435,11 @@
pos = i;
if (text == null)
{
- return null;
+ return false;
}
- return reusableToken.reinit(text, start, i);
+ termAtt.setTermBuffer(text);
+ offsetAtt.setOffset(start, i);
+ return true;
}
private boolean isTokenChar(char c, boolean isLetter) {
Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/SynonymTokenFilter.java Sat Aug 1 22:52:32 2009
@@ -19,9 +19,12 @@
import java.io.IOException;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
/**
* Injects additional tokens for synonyms of token terms fetched from the
@@ -39,9 +42,13 @@
private String[] stack = null;
private int index = 0;
- private Token current = null;
+ private AttributeSource.State current = null;
private int todo = 0;
+ private TermAttribute termAtt;
+ private TypeAttribute typeAtt;
+ private PositionIncrementAttribute posIncrAtt;
+
/**
* Creates an instance for the given underlying stream and synonym table.
*
@@ -64,28 +71,29 @@
this.synonyms = synonyms;
this.maxSynonyms = maxSynonyms;
+
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ this.posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
}
/** Returns the next token in the stream, or null at EOS. */
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ public final boolean incrementToken() throws IOException {
while (todo > 0 && index < stack.length) { // pop from stack
- Token nextToken = createToken(stack[index++], current, reusableToken);
- if (nextToken != null) {
+ if (createToken(stack[index++], current)) {
todo--;
- return nextToken;
+ return true;
}
}
- Token nextToken = input.next(reusableToken);
- if (nextToken == null) return null; // EOS; iterator exhausted
+ if (!input.incrementToken()) return false; // EOS; iterator exhausted
- stack = synonyms.getSynonyms(nextToken.term()); // push onto stack
+ stack = synonyms.getSynonyms(termAtt.term()); // push onto stack
if (stack.length > maxSynonyms) randomize(stack);
index = 0;
- current = (Token) nextToken.clone();
+ current = captureState();
todo = maxSynonyms;
- return nextToken;
+ return true;
}
/**
@@ -101,12 +109,12 @@
* @return a new token, or null to indicate that the given synonym should be
* ignored
*/
- protected Token createToken(String synonym, Token current, final Token reusableToken) {
- reusableToken.reinit(current, synonym);
- reusableToken.setTermBuffer(synonym);
- reusableToken.setType(SYNONYM_TOKEN_TYPE);
- reusableToken.setPositionIncrement(0);
- return reusableToken;
+ protected boolean createToken(String synonym, AttributeSource.State current) {
+ restoreState(current);
+ termAtt.setTermBuffer(synonym);
+ typeAtt.setType(SYNONYM_TOKEN_TYPE);
+ posIncrAtt.setPositionIncrement(0);
+ return true;
}
/**
Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java (original)
+++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java Sat Aug 1 22:52:32 2009
@@ -25,6 +25,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
@@ -105,20 +106,16 @@
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- final Token reusableToken = new Token();
- Token nextToken;
-
+ TermAttribute termAtt = (TermAttribute) source.addAttribute(TermAttribute.class);
+
int countTokens = 0;
while (true) {
try {
- nextToken = source.next(reusableToken);
+ if (!source.incrementToken()) break;
} catch (IOException e) {
- nextToken = null;
- }
- if (nextToken == null) {
break;
}
- String term = nextToken.term();
+ String term = termAtt.term();
if (!"".equals(term)) {
try {
tlist.set(countTokens++, term);
@@ -191,19 +188,15 @@
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
List tlist = new ArrayList();
- final Token reusableToken = new Token();
- Token nextToken;
-
+ TermAttribute termAtt = (TermAttribute) source.addAttribute(TermAttribute.class);
+
while (true) {
try {
- nextToken = source.next(reusableToken);
+ if (!source.incrementToken()) break;
} catch (IOException e) {
- nextToken = null;
- }
- if (nextToken == null) {
break;
}
- tlist.add(nextToken.term());
+ tlist.add(termAtt.term());
}
try {
@@ -241,13 +234,15 @@
throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- final Token reusableToken = new Token();
- Token nextToken;
+ TermAttribute termAtt = (TermAttribute) source.addAttribute(TermAttribute.class);
+ String nextToken = null;
boolean multipleTokens = false;
-
+
try {
- nextToken = source.next(reusableToken);
- multipleTokens = source.next(reusableToken) != null;
+ if (source.incrementToken()) {
+ nextToken = termAtt.term();
+ }
+ multipleTokens = source.incrementToken();
} catch (IOException e) {
nextToken = null;
}
@@ -263,7 +258,7 @@
+ " - tokens were added");
}
- return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken.term(), minSimilarity);
+ return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken, minSimilarity);
}
/**
@@ -274,20 +269,17 @@
throws ParseException {
// get Analyzer from superclass and tokenize the terms
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1));
- final Token reusableToken = new Token();
- Token nextToken;
- Token multipleToken;
+ TermAttribute termAtt = (TermAttribute) source.addAttribute(TermAttribute.class);
boolean multipleTokens = false;
// part1
try {
- nextToken = source.next(reusableToken);
- if (nextToken != null) {
- part1 = nextToken.term();
+ if (source.incrementToken()) {
+ part1 = termAtt.term();
}
- multipleTokens = source.next(reusableToken) != null;
+ multipleTokens = source.incrementToken();
} catch (IOException e) {
- nextToken = null;
+ // ignore
}
try {
source.close();
@@ -301,14 +293,15 @@
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
+ termAtt = (TermAttribute) source.addAttribute(TermAttribute.class);
+
try {
- nextToken = source.next(reusableToken);
- if (nextToken != null) {
- part2 = nextToken.term();
+ if (source.incrementToken()) {
+ part2 = termAtt.term();
}
- multipleTokens = source.next(reusableToken) != null;
+ multipleTokens = source.incrementToken();
} catch (IOException e) {
- nextToken = null;
+ // ignore
}
try {
source.close();
Modified: lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/java/trunk/contrib/miscellaneous/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Sat Aug 1 22:52:32 2009
@@ -26,6 +26,8 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
@@ -57,28 +59,27 @@
boolean inPhrase = false;
int savedStart = 0, savedEnd = 0;
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+
+ public boolean incrementToken() throws IOException {
if (inPhrase) {
inPhrase = false;
- reusableToken.setTermBuffer("phrase2");
- reusableToken.setStartOffset(savedStart);
- reusableToken.setEndOffset(savedEnd);
- return reusableToken;
+ termAtt.setTermBuffer("phrase2");
+ offsetAtt.setOffset(savedStart, savedEnd);
+ return true;
} else
- for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) {
- if (nextToken.term().equals("phrase")) {
+ while(input.incrementToken())
+ if (termAtt.term().equals("phrase")) {
inPhrase = true;
- savedStart = nextToken.startOffset();
- savedEnd = nextToken.endOffset();
- nextToken.setTermBuffer("phrase1");
- nextToken.setStartOffset(savedStart);
- nextToken.setEndOffset(savedEnd);
- return nextToken;
- } else if (!nextToken.term().equals("stop"))
- return nextToken;
- }
- return null;
+ savedStart = offsetAtt.startOffset();
+ savedEnd = offsetAtt.endOffset();
+ termAtt.setTermBuffer("phrase1");
+ offsetAtt.setOffset(savedStart, savedEnd);
+ return true;
+ } else if (!termAtt.term().equals("stop"))
+ return true;
+ return false;
}
}
Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Sat Aug 1 22:52:32 2009
@@ -27,6 +27,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
@@ -181,13 +182,14 @@
{
if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
- final Token reusableToken = new Token();
+ TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
+
int corpusNumDocs=reader.numDocs();
Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
HashSet processedTerms=new HashSet();
- for (Token nextToken = ts.next(reusableToken); nextToken!=null; nextToken = ts.next(reusableToken))
+ while (ts.incrementToken())
{
- String term = nextToken.term();
+ String term = termAtt.term();
if(!processedTerms.contains(term))
{
processedTerms.add(term);
Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (original)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java Sat Aug 1 22:52:32 2009
@@ -28,9 +28,9 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import java.util.Set;
@@ -829,9 +829,10 @@
TokenStream ts = analyzer.tokenStream(fieldName, r);
int tokenCount=0;
// for every token
- final Token reusableToken = new Token();
- for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
- String word = nextToken.term();
+ TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
+
+ while (ts.incrementToken()) {
+ String word = termAtt.term();
tokenCount++;
if(tokenCount>maxNumTokensParsed)
{
Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java (original)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java Sat Aug 1 22:52:32 2009
@@ -21,8 +21,8 @@
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -86,11 +86,12 @@
throws IOException
{
TokenStream ts = a.tokenStream( field, new StringReader( body));
+ TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
+
BooleanQuery tmp = new BooleanQuery();
Set already = new HashSet(); // ignore dups
- final Token reusableToken = new Token();
- for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
- String word = nextToken.term();
+ while (ts.incrementToken()) {
+ String word = termAtt.term();
// ignore opt stop words
if ( stop != null &&
stop.contains( word)) continue;
Modified: lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original)
+++ lucene/java/trunk/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Sat Aug 1 22:52:32 2009
@@ -22,6 +22,7 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.tartarus.snowball.SnowballProgram;
/**
@@ -33,9 +34,12 @@
private SnowballProgram stemmer;
+ private TermAttribute termAtt;
+
public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
super(input);
this.stemmer = stemmer;
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/**
@@ -56,21 +60,34 @@
} catch (Exception e) {
throw new RuntimeException(e.toString());
}
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
/** Returns the next input Token, after being stemmed */
- public final Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
- Token nextToken = input.next(reusableToken);
- if (nextToken == null)
- return null;
- String originalTerm = nextToken.term();
- stemmer.setCurrent(originalTerm);
- stemmer.stem();
- String finalTerm = stemmer.getCurrent();
- // Don't bother updating, if it is unchanged.
- if (!originalTerm.equals(finalTerm))
- nextToken.setTermBuffer(finalTerm);
- return nextToken;
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String originalTerm = termAtt.term();
+ stemmer.setCurrent(originalTerm);
+ stemmer.stem();
+ String finalTerm = stemmer.getCurrent();
+ // Don't bother updating, if it is unchanged.
+ if (!originalTerm.equals(finalTerm))
+ termAtt.setTermBuffer(finalTerm);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
}
}
Modified: lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Sat Aug 1 22:52:32 2009
@@ -22,9 +22,14 @@
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.Payload;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
public class TestSnowball extends TestCase {
@@ -32,12 +37,12 @@
String input,
String[] output) throws Exception {
TokenStream ts = a.tokenStream("dummy", new StringReader(input));
- final Token reusableToken = new Token();
+ TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
for (int i = 0; i < output.length; i++) {
- Token nextToken = ts.next(reusableToken);
- assertEquals(output[i], nextToken.term());
+ assertTrue(ts.incrementToken());
+ assertEquals(output[i], termAtt.term());
}
- assertNull(ts.next(reusableToken));
+ assertFalse(ts.incrementToken());
ts.close();
}
@@ -49,33 +54,51 @@
public void testFilterTokens() throws Exception {
- final Token tok = new Token(2, 7, "wrd");
- tok.setTermBuffer("accents");
- tok.setPositionIncrement(3);
- Payload tokPayload = new Payload(new byte[]{0,1,2,3});
- tok.setPayload(tokPayload);
- int tokFlags = 77;
- tok.setFlags(tokFlags);
-
- SnowballFilter filter = new SnowballFilter(
- new TokenStream() {
- public Token next(final Token reusableToken) {
- assert reusableToken != null;
- return tok;
- }
- },
- "English"
- );
-
- final Token reusableToken = new Token();
- Token nextToken = filter.next(reusableToken);
-
- assertEquals("accent", nextToken.term());
- assertEquals(2, nextToken.startOffset());
- assertEquals(7, nextToken.endOffset());
- assertEquals("wrd", nextToken.type());
- assertEquals(3, nextToken.getPositionIncrement());
- assertEquals(tokFlags, nextToken.getFlags());
- assertEquals(tokPayload, nextToken.getPayload());
+ SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
+ TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+ OffsetAttribute offsetAtt = (OffsetAttribute) filter.getAttribute(OffsetAttribute.class);
+ TypeAttribute typeAtt = (TypeAttribute) filter.getAttribute(TypeAttribute.class);
+ PayloadAttribute payloadAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+ PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) filter.getAttribute(PositionIncrementAttribute.class);
+ FlagsAttribute flagsAtt = (FlagsAttribute) filter.getAttribute(FlagsAttribute.class);
+
+ filter.incrementToken();
+
+ assertEquals("accent", termAtt.term());
+ assertEquals(2, offsetAtt.startOffset());
+ assertEquals(7, offsetAtt.endOffset());
+ assertEquals("wrd", typeAtt.type());
+ assertEquals(3, posIncAtt.getPositionIncrement());
+ assertEquals(77, flagsAtt.getFlags());
+ assertEquals(new Payload(new byte[]{0,1,2,3}), payloadAtt.getPayload());
+ }
+
+ private final class TestTokenStream extends TokenStream {
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+ private TypeAttribute typeAtt;
+ private PayloadAttribute payloadAtt;
+ private PositionIncrementAttribute posIncAtt;
+ private FlagsAttribute flagsAtt;
+
+ TestTokenStream() {
+ super();
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+ posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
+ }
+
+ public boolean incrementToken() {
+ termAtt.setTermBuffer("accents");
+ offsetAtt.setOffset(2, 7);
+ typeAtt.setType("wrd");
+ posIncAtt.setPositionIncrement(3);
+ payloadAtt.setPayload(new Payload(new byte[]{0,1,2,3}));
+ flagsAtt.setFlags(77);
+ return true;
+ }
}
}
\ No newline at end of file
Modified: lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (original)
+++ lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java Sat Aug 1 22:52:32 2009
@@ -20,6 +20,12 @@
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
import java.io.IOException;
import java.io.Reader;
@@ -114,6 +120,12 @@
private int tokenOutput = TOKENS_ONLY;
private Set untokenizedTypes = Collections.EMPTY_SET;
private Iterator tokens = null;
+
+ private OffsetAttribute offsetAtt;
+ private TypeAttribute typeAtt;
+ private PositionIncrementAttribute posIncrAtt;
+ private TermAttribute termAtt;
+ private FlagsAttribute flagsAtt;
void setInput(Reader reader) {
this.input = CharReader.get(reader);
@@ -142,41 +154,59 @@
this.tokenOutput = tokenOutput;
this.scanner = new WikipediaTokenizerImpl(input);
this.untokenizedTypes = untokenizedTypes;
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+ this.posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
}
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next(final Token reusableToken) throws java.io.IOException {
+ return super.next(reusableToken);
+ }
+
+ /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+ * not be overridden. Delegates to the backwards compatibility layer. */
+ public final Token next() throws java.io.IOException {
+ return super.next();
+ }
+
/*
* (non-Javadoc)
*
* @see org.apache.lucene.analysis.TokenStream#next()
*/
- public Token next(final Token reusableToken) throws IOException {
- assert reusableToken != null;
+ public final boolean incrementToken() throws IOException {
if (tokens != null && tokens.hasNext()){
- return (Token)tokens.next();
+ AttributeSource.State state = (AttributeSource.State) tokens.next();
+ restoreState(state);
+ return true;
}
int tokenType = scanner.getNextToken();
if (tokenType == WikipediaTokenizerImpl.YYEOF) {
- return null;
+ return false;
}
String type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];
if (tokenOutput == TOKENS_ONLY || untokenizedTypes.contains(type) == false){
- setupToken(reusableToken);
+ setupToken();
} else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.contains(type) == true){
- collapseTokens(reusableToken, tokenType);
+ collapseTokens(tokenType);
}
else if (tokenOutput == BOTH){
//collapse into a single token, add it to tokens AND output the individual tokens
//output the untokenized Token first
- collapseAndSaveTokens(reusableToken, tokenType, type);
+ collapseAndSaveTokens(tokenType, type);
}
- reusableToken.setPositionIncrement(scanner.getPositionIncrement());
- reusableToken.setType(type);
- return reusableToken;
+ posIncrAtt.setPositionIncrement(scanner.getPositionIncrement());
+ typeAtt.setType(type);
+ return true;
}
- private void collapseAndSaveTokens(final Token reusableToken, int tokenType, String type) throws IOException {
+ private void collapseAndSaveTokens(int tokenType, String type) throws IOException {
//collapse
StringBuffer buffer = new StringBuffer(32);
int numAdded = scanner.setText(buffer);
@@ -186,9 +216,8 @@
int tmpTokType;
int numSeen = 0;
List tmp = new ArrayList();
- Token saved = new Token();
- setupSavedToken(saved, 0, type);
- tmp.add(saved);
+ setupSavedToken(0, type);
+ tmp.add(captureState());
//while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
while ((tmpTokType = scanner.getNextToken()) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen){
int currPos = scanner.yychar();
@@ -197,18 +226,16 @@
buffer.append(' ');
}
numAdded = scanner.setText(buffer);
- saved = new Token();
- setupSavedToken(saved, scanner.getPositionIncrement(), type);
- tmp.add(saved);
+ setupSavedToken(scanner.getPositionIncrement(), type);
+ tmp.add(captureState());
numSeen++;
lastPos = currPos + numAdded;
}
//trim the buffer
String s = buffer.toString().trim();
- reusableToken.setTermBuffer(s.toCharArray(), 0, s.length());
- reusableToken.setStartOffset(input.correctOffset(theStart));
- reusableToken.setEndOffset(input.correctOffset(theStart + s.length()));
- reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG);
+ termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
+ offsetAtt.setOffset(input.correctOffset(theStart), input.correctOffset(theStart + s.length()));
+ flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
scanner.yypushback(scanner.yylength());
@@ -216,13 +243,13 @@
tokens = tmp.iterator();
}
- private void setupSavedToken(Token saved, int positionInc, String type){
- setupToken(saved);
- saved.setPositionIncrement(positionInc);
- saved.setType(type);
+ private void setupSavedToken(int positionInc, String type){
+ setupToken();
+ posIncrAtt.setPositionIncrement(positionInc);
+ typeAtt.setType(type);
}
- private void collapseTokens(final Token reusableToken, int tokenType) throws IOException {
+ private void collapseTokens(int tokenType) throws IOException {
//collapse
StringBuffer buffer = new StringBuffer(32);
int numAdded = scanner.setText(buffer);
@@ -244,10 +271,9 @@
}
//trim the buffer
String s = buffer.toString().trim();
- reusableToken.setTermBuffer(s.toCharArray(), 0, s.length());
- reusableToken.setStartOffset(input.correctOffset(theStart));
- reusableToken.setEndOffset(input.correctOffset(theStart + s.length()));
- reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG);
+ termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
+ offsetAtt.setOffset(input.correctOffset(theStart), input.correctOffset(theStart + s.length()));
+ flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
scanner.yypushback(scanner.yylength());
@@ -256,11 +282,10 @@
}
}
- private void setupToken(final Token reusableToken) {
- scanner.getText(reusableToken);
+ private void setupToken() {
+ scanner.getText(termAtt);
final int start = scanner.yychar();
- reusableToken.setStartOffset(input.correctOffset(start));
- reusableToken.setEndOffset(input.correctOffset(start + reusableToken.termLength()));
+ offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start + termAtt.termLength()));
}
/*
Modified: lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java?rev=799953&r1=799952&r2=799953&view=diff
==============================================================================
--- lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java (original)
+++ lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java Sat Aug 1 22:52:32 2009
@@ -19,7 +19,7 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
@@ -476,7 +476,7 @@
/**
* Fills Lucene token with the current token text.
*/
-final void getText(Token t) {
+final void getText(TermAttribute t) {
t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}