You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/06/01 13:46:58 UTC
svn commit: r950026 [2/5] - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/search/
lucene/backwards/src/test/org/apache/lucene/analysis/
lucene/backwards/src/test/org/apache/lucene/docume...
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java Tue Jun 1 11:46:54 2010
@@ -21,14 +21,14 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link IndicNormalizer} to normalize text
* in Indian Languages.
*/
public final class IndicNormalizationFilter extends TokenFilter {
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final IndicNormalizer normalizer = new IndicNormalizer();
public IndicNormalizationFilter(TokenStream input) {
@@ -38,7 +38,7 @@ public final class IndicNormalizationFil
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), termAtt.termLength()));
+ termAtt.setLength(normalizer.normalize(termAtt.buffer(), termAtt.length()));
return true;
} else {
return false;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java Tue Jun 1 11:46:54 2010
@@ -31,8 +31,8 @@ import org.apache.lucene.analysis.CharAr
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/**
@@ -330,8 +330,8 @@ public final class PatternAnalyzer exten
private Matcher matcher;
private int pos = 0;
private static final Locale locale = Locale.getDefault();
- private TermAttribute termAtt = addAttribute(TermAttribute.class);
- private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) {
this.str = str;
@@ -358,7 +358,7 @@ public final class PatternAnalyzer exten
if (start != end) { // non-empty match (header/trailer)
String text = str.substring(start, end);
if (toLowerCase) text = text.toLowerCase(locale);
- termAtt.setTermBuffer(text);
+ termAtt.setEmpty().append(text);
offsetAtt.setOffset(start, end);
return true;
}
@@ -390,8 +390,8 @@ public final class PatternAnalyzer exten
private final boolean toLowerCase;
private final Set<?> stopWords;
private static final Locale locale = Locale.getDefault();
- private TermAttribute termAtt = addAttribute(TermAttribute.class);
- private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set<?> stopWords) {
this.str = str;
@@ -444,7 +444,7 @@ public final class PatternAnalyzer exten
{
return false;
}
- termAtt.setTermBuffer(text);
+ termAtt.setEmpty().append(text);
offsetAtt.setOffset(start, i);
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
@@ -44,14 +44,14 @@ public class PrefixAwareTokenFilter exte
private TokenStream prefix;
private TokenStream suffix;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private PayloadAttribute payloadAtt;
private OffsetAttribute offsetAtt;
private TypeAttribute typeAtt;
private FlagsAttribute flagsAtt;
- private TermAttribute p_termAtt;
+ private CharTermAttribute p_termAtt;
private PositionIncrementAttribute p_posIncrAtt;
private PayloadAttribute p_payloadAtt;
private OffsetAttribute p_offsetAtt;
@@ -64,14 +64,14 @@ public class PrefixAwareTokenFilter exte
this.prefix = prefix;
prefixExhausted = false;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
- p_termAtt = prefix.addAttribute(TermAttribute.class);
+ p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
@@ -115,7 +115,7 @@ public class PrefixAwareTokenFilter exte
private void setCurrentToken(Token token) {
if (token == null) return;
clearAttributes();
- termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -125,7 +125,7 @@ public class PrefixAwareTokenFilter exte
private Token getNextPrefixInputToken(Token token) throws IOException {
if (!prefix.incrementToken()) return null;
- token.setTermBuffer(p_termAtt.termBuffer(), 0, p_termAtt.termLength());
+ token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
token.setPositionIncrement(p_posIncrAtt.getPositionIncrement());
token.setFlags(p_flagsAtt.getFlags());
token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
@@ -136,7 +136,7 @@ public class PrefixAwareTokenFilter exte
private Token getNextSuffixInputToken(Token token) throws IOException {
if (!suffix.incrementToken()) return null;
- token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setPositionIncrement(posIncrAtt.getPositionIncrement());
token.setFlags(flagsAtt.getFlags());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java Tue Jun 1 11:46:54 2010
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenStream} containing a single token.
@@ -41,7 +41,7 @@ public final class SingleTokenTokenStrea
assert token != null;
this.singleToken = (Token) token.clone();
- tokenAtt = (AttributeImpl) addAttribute(TermAttribute.class);
+ tokenAtt = (AttributeImpl) addAttribute(CharTermAttribute.class);
assert (tokenAtt instanceof Token);
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java Tue Jun 1 11:46:54 2010
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.CharAr
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
/**
@@ -34,7 +34,7 @@ import org.apache.lucene.util.Version;
public final class StemmerOverrideFilter extends TokenFilter {
private final CharArrayMap<String> dictionary;
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
/**
@@ -56,9 +56,9 @@ public final class StemmerOverrideFilter
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
- String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength());
+ String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length());
if (stem != null) {
- termAtt.setTermBuffer(stem);
+ termAtt.setEmpty().append(stem);
keywordAtt.setKeyword(true);
}
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ngram
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
@@ -72,8 +72,8 @@ public final class EdgeNGramTokenFilter
private int curGramSize;
private int tokStart;
- private final TermAttribute termAtt;
- private final OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@@ -101,8 +101,6 @@ public final class EdgeNGramTokenFilter
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
/**
@@ -124,8 +122,8 @@ public final class EdgeNGramTokenFilter
if (!input.incrementToken()) {
return false;
} else {
- curTermBuffer = termAtt.termBuffer().clone();
- curTermLength = termAtt.termLength();
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
curGramSize = minGram;
tokStart = offsetAtt.startOffset();
}
@@ -138,7 +136,7 @@ public final class EdgeNGramTokenFilter
int end = start + curGramSize;
clearAttributes();
offsetAtt.setOffset(tokStart + start, tokStart + end);
- termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, start, curGramSize);
curGramSize++;
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Tue Jun 1 11:46:54 2010
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram
*/
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
import java.io.IOException;
@@ -37,8 +37,8 @@ public final class EdgeNGramTokenizer ex
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/** Specifies which side of the input the n-gram should be generated from */
public static enum Side {
@@ -173,10 +173,6 @@ public final class EdgeNGramTokenizer ex
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
-
}
/** Returns the next token in the stream, or null at EOS. */
@@ -206,7 +202,7 @@ public final class EdgeNGramTokenizer ex
// grab gramSize chars from front or back
int start = side == Side.FRONT ? 0 : inLen - gramSize;
int end = start + gramSize;
- termAtt.setTermBuffer(inStr, start, gramSize);
+ termAtt.setEmpty().append(inStr, start, end);
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
gramSize++;
return true;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Tokenizes the input into n-grams of the given size(s).
@@ -39,8 +39,8 @@ public final class NGramTokenFilter exte
private int curPos;
private int tokStart;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates NGramTokenFilter with given min and max n-grams.
@@ -58,9 +58,6 @@ public final class NGramTokenFilter exte
}
this.minGram = minGram;
this.maxGram = maxGram;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
/**
@@ -79,8 +76,8 @@ public final class NGramTokenFilter exte
if (!input.incrementToken()) {
return false;
} else {
- curTermBuffer = termAtt.termBuffer().clone();
- curTermLength = termAtt.termLength();
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
curGramSize = minGram;
curPos = 0;
tokStart = offsetAtt.startOffset();
@@ -89,7 +86,7 @@ public final class NGramTokenFilter exte
while (curGramSize <= maxGram) {
while (curPos+curGramSize <= curTermLength) { // while there is input
clearAttributes();
- termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
curPos++;
return true;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Tue Jun 1 11:46:54 2010
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram
*/
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
import java.io.IOException;
@@ -39,8 +39,8 @@ public final class NGramTokenizer extend
private String inStr;
private boolean started = false;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates NGramTokenizer with given min and max n-grams.
@@ -94,9 +94,6 @@ public final class NGramTokenizer extend
}
this.minGram = minGram;
this.maxGram = maxGram;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
/** Returns the next token in the stream, or null at EOS. */
@@ -123,7 +120,7 @@ public final class NGramTokenizer extend
int oldPos = pos;
pos++;
- termAtt.setTermBuffer(inStr, oldPos, gramSize);
+ termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize);
offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java Tue Jun 1 11:46:54 2010
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenF
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that stems Dutch words.
@@ -52,17 +52,14 @@ public final class DutchStemFilter exten
/**
* The actual token in the input stream.
*/
- private DutchStemmer stemmer = null;
+ private DutchStemmer stemmer = new DutchStemmer();
private Set<?> exclusions = null;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public DutchStemFilter(TokenStream _in) {
super(_in);
- stemmer = new DutchStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
/**
@@ -99,14 +96,14 @@ public final class DutchStemFilter exten
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final String term = termAtt.term();
+ final String term = termAtt.toString();
// Check the exclusion table.
if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) {
final String s = stemmer.stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals(term))
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
@@ -39,15 +39,13 @@ import org.apache.lucene.analysis.tokena
public final class DelimitedPayloadTokenFilter extends TokenFilter {
public static final char DEFAULT_DELIMITER = '|';
private final char delimiter;
- private final TermAttribute termAtt;
- private final PayloadAttribute payAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class);
private final PayloadEncoder encoder;
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
super(input);
- termAtt = addAttribute(TermAttribute.class);
- payAtt = addAttribute(PayloadAttribute.class);
this.delimiter = delimiter;
this.encoder = encoder;
}
@@ -55,12 +53,12 @@ public final class DelimitedPayloadToken
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ final int length = termAtt.length();
for (int i = 0; i < length; i++) {
if (buffer[i] == delimiter) {
payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
- termAtt.setTermLength(i); // simply set a new length
+ termAtt.setLength(i); // simply set a new length
return true;
}
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -35,16 +35,14 @@ public class NumericPayloadTokenFilter e
private String typeMatch;
private Payload thePayload;
- private PayloadAttribute payloadAtt;
- private TypeAttribute typeAtt;
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input);
//Need to encode the payload
thePayload = new Payload(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;
- payloadAtt = addAttribute(PayloadAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -33,13 +33,11 @@ import org.apache.lucene.index.Payload;
*
**/
public class TokenOffsetPayloadTokenFilter extends TokenFilter {
- protected OffsetAttribute offsetAtt;
- protected PayloadAttribute payAtt;
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class);
public TokenOffsetPayloadTokenFilter(TokenStream input) {
super(input);
- offsetAtt = addAttribute(OffsetAttribute.class);
- payAtt = addAttribute(PayloadAttribute.class);
}
@Override
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -33,13 +33,11 @@ import java.io.IOException;
*
**/
public class TypeAsPayloadTokenFilter extends TokenFilter {
- private PayloadAttribute payloadAtt;
- private TypeAttribute typeAtt;
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public TypeAsPayloadTokenFilter(TokenStream input) {
super(input);
- payloadAtt = addAttribute(PayloadAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java Tue Jun 1 11:46:54 2010
@@ -35,7 +35,7 @@ public final class PositionFilter extend
/** The first token must have non-zero positionIncrement **/
private boolean firstTokenPositioned = false;
- private PositionIncrementAttribute posIncrAtt;
+ private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
/**
* Constructs a PositionFilter that assigns a position increment of zero to
@@ -45,7 +45,6 @@ public final class PositionFilter extend
*/
public PositionFilter(final TokenStream input) {
super(input);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
/**
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java Tue Jun 1 11:46:54 2010
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.rever
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -42,7 +42,7 @@ import java.io.IOException;
*/
public final class ReverseStringFilter extends TokenFilter {
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final char marker;
private final Version matchVersion;
private static final char NOMARKER = '\uFFFF';
@@ -131,20 +131,19 @@ public final class ReverseStringFilter e
super(in);
this.matchVersion = matchVersion;
this.marker = marker;
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- int len = termAtt.termLength();
+ int len = termAtt.length();
if (marker != NOMARKER) {
len++;
- termAtt.resizeTermBuffer(len);
- termAtt.termBuffer()[len - 1] = marker;
+ termAtt.resizeBuffer(len);
+ termAtt.buffer()[len - 1] = marker;
}
- reverse( matchVersion, termAtt.termBuffer(), 0, len );
- termAtt.setTermLength(len);
+ reverse( matchVersion, termAtt.buffer(), 0, len );
+ termAtt.setLength(len);
return true;
} else {
return false;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java Tue Jun 1 11:46:54 2010
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Normalizes token text to lower case.
@@ -32,20 +32,19 @@ import org.apache.lucene.analysis.tokena
@Deprecated
public final class RussianLowerCaseFilter extends TokenFilter
{
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public RussianLowerCaseFilter(TokenStream in)
{
super(in);
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public final boolean incrementToken() throws IOException
{
if (input.incrementToken()) {
- char[] chArray = termAtt.termBuffer();
- int chLen = termAtt.termLength();
+ char[] chArray = termAtt.buffer();
+ int chLen = termAtt.length();
for (int i = 0; i < chLen; i++)
{
chArray[i] = Character.toLowerCase(chArray[i]);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java Tue Jun 1 11:46:54 2010
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.LowerC
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link
@@ -51,17 +51,14 @@ public final class RussianStemFilter ext
/**
* The actual token in the input stream.
*/
- private RussianStemmer stemmer = null;
+ private RussianStemmer stemmer = new RussianStemmer();
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public RussianStemFilter(TokenStream in)
{
super(in);
- stemmer = new RussianStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
/**
* Returns the next token in the stream, or null at EOS
@@ -71,10 +68,10 @@ public final class RussianStemFilter ext
{
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
- final String term = termAtt.term();
+ final String term = termAtt.toString();
final String s = stemmer.stem(term);
if (s != null && !s.equals(term))
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java Tue Jun 1 11:46:54 2010
@@ -137,10 +137,10 @@ public final class ShingleFilter extends
*/
private boolean isOutputHere = false;
- private final CharTermAttribute termAtt;
- private final OffsetAttribute offsetAtt;
- private final PositionIncrementAttribute posIncrAtt;
- private final TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
/**
@@ -155,10 +155,6 @@ public final class ShingleFilter extends
super(input);
setMaxShingleSize(maxShingleSize);
setMinShingleSize(minShingleSize);
- this.termAtt = addAttribute(CharTermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
- this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- this.typeAtt = addAttribute(TypeAttribute.class);
}
/**
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java Tue Jun 1 11:46:54 2010
@@ -31,11 +31,11 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
@@ -193,14 +193,14 @@ public final class ShingleMatrixFilter e
private TokenStream input;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private PayloadAttribute payloadAtt;
private OffsetAttribute offsetAtt;
private TypeAttribute typeAtt;
private FlagsAttribute flagsAtt;
- private TermAttribute in_termAtt;
+ private CharTermAttribute in_termAtt;
private PositionIncrementAttribute in_posIncrAtt;
private PayloadAttribute in_payloadAtt;
private OffsetAttribute in_offsetAtt;
@@ -229,7 +229,7 @@ public final class ShingleMatrixFilter e
this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
this.settingsCodec = settingsCodec;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
@@ -239,7 +239,7 @@ public final class ShingleMatrixFilter e
// set the input to be an empty token stream, we already have the data.
this.input = new EmptyTokenStream();
- in_termAtt = input.addAttribute(TermAttribute.class);
+ in_termAtt = input.addAttribute(CharTermAttribute.class);
in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
in_payloadAtt = input.addAttribute(PayloadAttribute.class);
in_offsetAtt = input.addAttribute(OffsetAttribute.class);
@@ -311,14 +311,14 @@ public final class ShingleMatrixFilter e
this.spacerCharacter = spacerCharacter;
this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
this.settingsCodec = settingsCodec;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
- in_termAtt = input.addAttribute(TermAttribute.class);
+ in_termAtt = input.addAttribute(CharTermAttribute.class);
in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
in_payloadAtt = input.addAttribute(PayloadAttribute.class);
in_offsetAtt = input.addAttribute(OffsetAttribute.class);
@@ -377,7 +377,7 @@ public final class ShingleMatrixFilter e
if (token == null) return false;
clearAttributes();
- termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -388,7 +388,7 @@ public final class ShingleMatrixFilter e
private Token getNextInputToken(Token token) throws IOException {
if (!input.incrementToken()) return null;
- token.setTermBuffer(in_termAtt.termBuffer(), 0, in_termAtt.termLength());
+ token.copyBuffer(in_termAtt.buffer(), 0, in_termAtt.length());
token.setPositionIncrement(in_posIncrAtt.getPositionIncrement());
token.setFlags(in_flagsAtt.getFlags());
token.setOffset(in_offsetAtt.startOffset(), in_offsetAtt.endOffset());
@@ -399,7 +399,7 @@ public final class ShingleMatrixFilter e
private Token getNextToken(Token token) throws IOException {
if (!this.incrementToken()) return null;
- token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setPositionIncrement(posIncrAtt.getPositionIncrement());
token.setFlags(flagsAtt.getFlags());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
@@ -441,7 +441,7 @@ public final class ShingleMatrixFilter e
for (int i = 0; i < currentShingleLength; i++) {
Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
- termLength += shingleToken.termLength();
+ termLength += shingleToken.length();
shingle.add(shingleToken);
}
if (spacerCharacter != null) {
@@ -459,9 +459,9 @@ public final class ShingleMatrixFilter e
if (spacerCharacter != null && sb.length() > 0) {
sb.append(spacerCharacter);
}
- sb.append(shingleToken.termBuffer(), 0, shingleToken.termLength());
+ sb.append(shingleToken.buffer(), 0, shingleToken.length());
}
- reusableToken.setTermBuffer(sb.toString());
+ reusableToken.setEmpty().append(sb);
updateToken(reusableToken, shingle, currentPermutationTokensStartOffset, currentPermutationRows, currentPermuationTokens);
return reusableToken;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java Tue Jun 1 11:46:54 2010
@@ -22,7 +22,7 @@ import java.text.ParseException;
import java.util.Date;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkFilter;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
/**
@@ -35,7 +35,7 @@ public class DateRecognizerSinkFilter ex
public static final String DATE_TYPE = "date";
protected DateFormat dateFormat;
- protected TermAttribute termAtt;
+ protected CharTermAttribute termAtt;
/**
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
@@ -51,10 +51,10 @@ public class DateRecognizerSinkFilter ex
@Override
public boolean accept(AttributeSource source) {
if (termAtt == null) {
- termAtt = source.addAttribute(TermAttribute.class);
+ termAtt = source.addAttribute(CharTermAttribute.class);
}
try {
- Date date = dateFormat.parse(termAtt.term());//We don't care about the date, just that we can parse it as a date
+ Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
if (date != null) {
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Tue Jun 1 11:46:54 2010
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
import org.apache.lucene.analysis.LowerCaseFilter; // javadoc @link
import org.tartarus.snowball.SnowballProgram;
@@ -42,7 +42,7 @@ public final class SnowballFilter extend
private final SnowballProgram stemmer;
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
@@ -75,16 +75,16 @@ public final class SnowballFilter extend
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
- char termBuffer[] = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ char termBuffer[] = termAtt.buffer();
+ final int length = termAtt.length();
stemmer.setCurrent(termBuffer, length);
stemmer.stem();
final char finalTerm[] = stemmer.getCurrentBuffer();
final int newLength = stemmer.getCurrentBufferLength();
if (finalTerm != termBuffer)
- termAtt.setTermBuffer(finalTerm, 0, newLength);
+ termAtt.copyBuffer(finalTerm, 0, newLength);
else
- termAtt.setTermLength(newLength);
+ termAtt.setLength(newLength);
}
return true;
} else {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java Tue Jun 1 11:46:54 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Normalizes Turkish token text to lower case.
@@ -37,7 +37,7 @@ public final class TurkishLowerCaseFilte
private static final int LATIN_SMALL_LETTER_I = '\u0069';
private static final int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
private static final int COMBINING_DOT_ABOVE = '\u0307';
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new TurkishLowerCaseFilter, that normalizes Turkish token text
@@ -47,7 +47,6 @@ public final class TurkishLowerCaseFilte
*/
public TurkishLowerCaseFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
}
@Override
@@ -55,8 +54,8 @@ public final class TurkishLowerCaseFilte
boolean iOrAfter = false;
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ int length = termAtt.length();
for (int i = 0; i < length;) {
final int ch = Character.codePointAt(buffer, i);
@@ -88,7 +87,7 @@ public final class TurkishLowerCaseFilte
i += Character.toChars(Character.toLowerCase(ch), buffer, i);
}
- termAtt.setTermLength(length);
+ termAtt.setLength(length);
return true;
} else
return false;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Tue Jun 1 11:46:54 2010
@@ -18,10 +18,10 @@
package org.apache.lucene.analysis.wikipedia;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -116,11 +116,11 @@ public final class WikipediaTokenizer ex
private Set<String> untokenizedTypes = Collections.emptySet();
private Iterator<AttributeSource.State> tokens = null;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
- private PositionIncrementAttribute posIncrAtt;
- private TermAttribute termAtt;
- private FlagsAttribute flagsAtt;
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
/**
* Creates a new instance of the {@link WikipediaTokenizer}. Attaches the
@@ -176,12 +176,7 @@ public final class WikipediaTokenizer ex
private void init(int tokenOutput, Set<String> untokenizedTypes) {
this.tokenOutput = tokenOutput;
- this.untokenizedTypes = untokenizedTypes;
- this.offsetAtt = addAttribute(OffsetAttribute.class);
- this.typeAtt = addAttribute(TypeAttribute.class);
- this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- this.termAtt = addAttribute(TermAttribute.class);
- this.flagsAtt = addAttribute(FlagsAttribute.class);
+ this.untokenizedTypes = untokenizedTypes;
}
/*
@@ -245,8 +240,9 @@ public final class WikipediaTokenizer ex
lastPos = currPos + numAdded;
}
//trim the buffer
+ // TODO: this is inefficient
String s = buffer.toString().trim();
- termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
+ termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
@@ -283,8 +279,9 @@ public final class WikipediaTokenizer ex
lastPos = currPos + numAdded;
}
//trim the buffer
+ // TODO: this is inefficient
String s = buffer.toString().trim();
- termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
+ termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
@@ -298,7 +295,7 @@ public final class WikipediaTokenizer ex
private void setupToken() {
scanner.getText(termAtt);
final int start = scanner.yychar();
- offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.termLength()));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
}
/*
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Tue Jun 1 11:46:54 2010
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:43 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 6/1/10 7:00 AM */
package org.apache.lucene.analysis.wikipedia;
@@ -19,14 +19,14 @@ package org.apache.lucene.analysis.wikip
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 17.05.10 14:43 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 6/1/10 7:00 AM from the specification file
+ * <tt>C:/Users/rmuir/workspace/lucene_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@@ -37,16 +37,16 @@ class WikipediaTokenizerImpl {
private static final int ZZ_BUFFERSIZE = 16384;
/** lexical states */
- public static final int CATEGORY_STATE = 2;
- public static final int DOUBLE_EQUALS_STATE = 14;
+ public static final int THREE_SINGLE_QUOTES_STATE = 10;
public static final int EXTERNAL_LINK_STATE = 6;
+ public static final int DOUBLE_EQUALS_STATE = 14;
public static final int INTERNAL_LINK_STATE = 4;
public static final int DOUBLE_BRACE_STATE = 16;
- public static final int FIVE_SINGLE_QUOTES_STATE = 12;
+ public static final int CATEGORY_STATE = 2;
+ public static final int YYINITIAL = 0;
public static final int STRING = 18;
+ public static final int FIVE_SINGLE_QUOTES_STATE = 12;
public static final int TWO_SINGLE_QUOTES_STATE = 8;
- public static final int YYINITIAL = 0;
- public static final int THREE_SINGLE_QUOTES_STATE = 10;
/**
* ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@@ -487,8 +487,8 @@ public final int getPositionIncrement(){
/**
* Fills Lucene token with the current token text.
*/
-final void getText(TermAttribute t) {
- t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+final void getText(CharTermAttribute t) {
+ t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}
final int setText(StringBuilder buffer){
@@ -803,184 +803,184 @@ final int setText(StringBuilder buffer){
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 25:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);
+ case 16:
+ { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
}
case 46: break;
- case 30:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/
+ case 39:
+ { positionInc = 1; return ACRONYM;
}
case 47: break;
- case 41:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/
+ case 8:
+ { /* ignore */
}
case 48: break;
- case 14:
- { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+ case 20:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE);
}
case 49: break;
- case 23:
- { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);
+ case 35:
+ { positionInc = 1; return COMPANY;
}
case 50: break;
- case 34:
- { positionInc = 1; return NUM;
+ case 4:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
}
case 51: break;
- case 18:
- { /* ignore STRING */
+ case 25:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);
}
case 52: break;
- case 12:
- { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
+ case 43:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);
}
case 53: break;
- case 37:
- { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/
+ case 22:
+ { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}
}
case 54: break;
- case 31:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE);
+ case 34:
+ { positionInc = 1; return NUM;
}
case 55: break;
- case 10:
- { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);
+ case 32:
+ { positionInc = 1; return APOSTROPHE;
}
case 56: break;
- case 38:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/
+ case 23:
+ { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);
}
case 57: break;
- case 19:
- { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+ case 21:
+ { yybegin(STRING); return currentTokType;/*pipe*/
}
case 58: break;
- case 11:
- { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);
+ case 2:
+ { positionInc = 1; return ALPHANUM;
}
case 59: break;
- case 1:
- { numWikiTokensSeen = 0; positionInc = 1;
+ case 29:
+ { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
}
case 60: break;
- case 33:
- { positionInc = 1; return HOST;
+ case 17:
+ { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
}
case 61: break;
- case 3:
- { positionInc = 1; return CJ;
+ case 44:
+ { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);
}
case 62: break;
- case 17:
- { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
+ case 26:
+ { yybegin(YYINITIAL);
}
case 63: break;
- case 32:
- { positionInc = 1; return APOSTROPHE;
+ case 3:
+ { positionInc = 1; return CJ;
}
case 64: break;
- case 8:
- { /* ignore */
+ case 38:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/
}
case 65: break;
- case 4:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
+ case 15:
+ { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING);
}
case 66: break;
- case 2:
- { positionInc = 1; return ALPHANUM;
+ case 30:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/
}
case 67: break;
- case 26:
- { yybegin(YYINITIAL);
+ case 6:
+ { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
}
case 68: break;
- case 43:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);
+ case 5:
+ { positionInc = 1;
}
case 69: break;
- case 36:
- { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE);
+ case 19:
+ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
}
case 70: break;
- case 13:
- { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);
+ case 42:
+ { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
}
case 71: break;
- case 24:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);
- }
- case 72: break;
case 27:
{ numLinkToks = 0; yybegin(YYINITIAL);
}
+ case 72: break;
+ case 11:
+ { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);
+ }
case 73: break;
- case 15:
- { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING);
+ case 13:
+ { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);
}
case 74: break;
- case 28:
- { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
+ case 14:
+ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
}
case 75: break;
- case 39:
- { positionInc = 1; return ACRONYM;
+ case 45:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE);
}
case 76: break;
- case 29:
- { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
+ case 28:
+ { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
}
case 77: break;
- case 7:
- { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+ case 37:
+ { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/
}
case 78: break;
- case 16:
- { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
+ case 9:
+ { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
}
case 79: break;
- case 20:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE);
+ case 7:
+ { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
}
case 80: break;
- case 35:
- { positionInc = 1; return COMPANY;
+ case 24:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);
}
case 81: break;
case 40:
{ positionInc = 1; return EMAIL;
}
case 82: break;
- case 42:
- { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+ case 1:
+ { numWikiTokensSeen = 0; positionInc = 1;
}
case 83: break;
- case 6:
- { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+ case 18:
+ { /* ignore STRING */
}
case 84: break;
- case 44:
- { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);
+ case 36:
+ { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE);
}
case 85: break;
- case 5:
- { positionInc = 1;
+ case 33:
+ { positionInc = 1; return HOST;
}
case 86: break;
- case 9:
- { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+ case 31:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE);
}
case 87: break;
- case 45:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE);
+ case 41:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/
}
case 88: break;
- case 22:
- { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}
+ case 12:
+ { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
}
case 89: break;
- case 21:
- { yybegin(STRING); return currentTokType;/*pipe*/
+ case 10:
+ { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);
}
case 90: break;
default:
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex Tue Jun 1 11:46:54 2010
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.wikip
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%%
@@ -81,8 +81,8 @@ public final int getPositionIncrement(){
/**
* Fills Lucene token with the current token text.
*/
-final void getText(TermAttribute t) {
- t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+final void getText(CharTermAttribute t) {
+ t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}
final int setText(StringBuilder buffer){
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.compo
* limitations under the License.
*/
-import java.io.File;
-import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
@@ -27,7 +25,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
public void testHyphenationCompoundWordsDA() throws Exception {
@@ -176,15 +174,15 @@ public class TestCompoundWordTokenFilter
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
- TermAttribute termAtt = tf.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
assertTrue(tf.incrementToken());
- assertEquals("Rindfleischüberwachungsgesetz", termAtt.term());
+ assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
- assertEquals("Rind", termAtt.term());
+ assertEquals("Rind", termAtt.toString());
wsTokenizer.reset(new StringReader("Rindfleischüberwachungsgesetz"));
tf.reset();
assertTrue(tf.incrementToken());
- assertEquals("Rindfleischüberwachungsgesetz", termAtt.term());
+ assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
private Reader getHyphenationReader() throws Exception {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java Tue Jun 1 11:46:54 2010
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
@@ -50,9 +51,9 @@ public class TestElision extends BaseTok
private List<String> filter(TokenFilter filter) throws IOException {
List<String> tas = new ArrayList<String>();
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
while (filter.incrementToken()) {
- tas.add(termAtt.term());
+ tas.add(termAtt.toString());
}
return tas;
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -41,8 +41,6 @@ public class TestPrefixAndSuffixAwareTok
private static Token createToken(String term, int start, int offset)
{
- Token token = new Token(start, offset);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset);
}
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -52,8 +52,6 @@ public class TestPrefixAwareTokenFilter
private static Token createToken(String term, int start, int offset)
{
- Token token = new Token(start, offset);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset);
}
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java Tue Jun 1 11:46:54 2010
@@ -22,14 +22,14 @@ import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestSingleTokenTokenFilter extends LuceneTestCase {
public void test() throws IOException {
Token token = new Token();
SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
- AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(TermAttribute.class);
+ AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(CharTermAttribute.class);
assertTrue(tokenAtt instanceof Token);
ts.reset();
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=950026&r1=950025&r2=950026&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Tue Jun 1 11:46:54 2010
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.paylo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.LuceneTestCase;
@@ -32,7 +32,7 @@ public class DelimitedPayloadTokenFilter
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
@@ -70,7 +70,7 @@ public class DelimitedPayloadTokenFilter
public void testFloatEncoding() throws Exception {
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
@@ -88,7 +88,7 @@ public class DelimitedPayloadTokenFilter
public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
@@ -104,10 +104,10 @@ public class DelimitedPayloadTokenFilter
}
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
- TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
assertTrue(stream.incrementToken());
- assertEquals(expected, termAtt.term());
+ assertEquals(expected, termAtt.toString());
Payload payload = payloadAtt.getPayload();
if (payload != null) {
assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
@@ -121,9 +121,9 @@ public class DelimitedPayloadTokenFilter
}
- void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+ void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
assertTrue(stream.incrementToken());
- assertEquals(expected, termAtt.term());
+ assertEquals(expected, termAtt.toString());
Payload payload = payAtt.getPayload();
if (payload != null) {
assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);