You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2009/09/11 08:12:14 UTC
svn commit: r813671 - in /lucene/java/trunk: ./
contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/ contr...
Author: uschindler
Date: Fri Sep 11 06:12:13 2009
New Revision: 813671
URL: http://svn.apache.org/viewvc?rev=813671&view=rev
Log:
LUCENE-1906: Fix backwards problems with CharStream and Tokenizers with custom reset(Reader) method.
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/common-build.xml
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/CharFilter.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Sep 11 06:12:13 2009
@@ -350,10 +350,12 @@
a top level reader and docID.
(Shai Erera, Chris Hostetter, Martin Ruckli, Mark Miller via Mike McCandless)
- * LUCENE-1466: Changed Tokenizer.input to be a CharStream; added
- CharFilter and MappingCharFilter, which allows chaining & mapping
- of characters before tokenizers run. (Koji Sekiguchi via Mike
- McCandless)
+ * LUCENE-1466, LUCENE-1906: Added CharFilter and MappingCharFilter, which allows
+ chaining & mapping of characters before tokenizers run. CharStream (subclass of
+ Reader) is the base class for custom java.io.Reader's, that support offset
+ correction. Tokenizers got an additional method correctOffset() that is passed
+ down to the underlying CharStream if input is a subclass of CharStream/-Filter.
+ (Koji Sekiguchi via Mike McCandless, Uwe Schindler)
* LUCENE-1703: Add IndexWriter.waitForMerges. (Tim Smith via Mike
McCandless)
Modified: lucene/java/trunk/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/common-build.xml?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/common-build.xml (original)
+++ lucene/java/trunk/common-build.xml Fri Sep 11 06:12:13 2009
@@ -42,7 +42,7 @@
<property name="Name" value="Lucene"/>
<property name="dev.version" value="2.9"/>
<property name="version" value="${dev.version}"/>
- <property name="compatibility.tag" value="lucene_2_4_back_compat_tests_20090903"/>
+ <property name="compatibility.tag" value="lucene_2_4_back_compat_tests_20090911"/>
<property name="spec.version" value="${version}"/>
<property name="year" value="2000-${current.year}"/>
<property name="final.name" value="lucene-${name}-${version}"/>
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java Fri Sep 11 06:12:13 2009
@@ -285,7 +285,7 @@
if (length > 0) {
termAtt.setTermBuffer(buffer, 0, length);
- offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start+length));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]);
return true;
} else if (dataLen == -1) {
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java Fri Sep 11 06:12:13 2009
@@ -104,7 +104,7 @@
//System.out.println(new String(buffer, 0,
//length));
termAtt.setTermBuffer(buffer, 0, length);
- offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start+length));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
}
else
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Fri Sep 11 06:12:13 2009
@@ -207,7 +207,7 @@
int start = side == Side.FRONT ? 0 : inLen - gramSize;
int end = start + gramSize;
termAtt.setTermBuffer(inStr, start, gramSize);
- offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(end));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(end));
gramSize++;
return true;
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Fri Sep 11 06:12:13 2009
@@ -124,7 +124,7 @@
int oldPos = pos;
pos++;
termAtt.setTermBuffer(inStr, oldPos, gramSize);
- offsetAtt.setOffset(input.correctOffset(oldPos), input.correctOffset(oldPos+gramSize));
+ offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
return true;
}
Modified: lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Fri Sep 11 06:12:13 2009
@@ -116,7 +116,7 @@
return false;
else {
termAtt.setTermBuffer(buffer.toString());
- offsetAtt.setOffset(input.correctOffset(tokenStart), input.correctOffset(tokenEnd));
+ offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
typeAtt.setType("sentence");
return true;
}
Modified: lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Fri Sep 11 06:12:13 2009
@@ -202,7 +202,7 @@
return false;
termAtt.setTermBuffer(snippet, startTerm, lenTerm);
- offsetAtt.setOffset(startOffset, startOffset + lenTerm);
+ offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm));
return true;
}
Modified: lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (original)
+++ lucene/java/trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java Fri Sep 11 06:12:13 2009
@@ -17,7 +17,6 @@
package org.apache.lucene.wikipedia.analysis;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
@@ -127,10 +126,6 @@
private TermAttribute termAtt;
private FlagsAttribute flagsAtt;
- void setInput(Reader reader) {
- this.input = CharReader.get(reader);
- }
-
/**
* Creates a new instance of the {@link WikipediaTokenizer}. Attaches the
* <code>input</code> to a newly created JFlex scanner.
@@ -267,7 +262,7 @@
//trim the buffer
String s = buffer.toString().trim();
termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
- offsetAtt.setOffset(input.correctOffset(theStart), input.correctOffset(theStart + s.length()));
+ offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
@@ -305,7 +300,7 @@
//trim the buffer
String s = buffer.toString().trim();
termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
- offsetAtt.setOffset(input.correctOffset(theStart), input.correctOffset(theStart + s.length()));
+ offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
@@ -318,7 +313,7 @@
private void setupToken() {
scanner.getText(termAtt);
final int start = scanner.yychar();
- offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start + termAtt.termLength()));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.termLength()));
}
/*
@@ -332,7 +327,7 @@
}
public void reset(Reader reader) throws IOException {
- setInput(reader);
+ super.reset(reader);
reset();
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java Fri Sep 11 06:12:13 2009
@@ -43,7 +43,7 @@
/** Retrieve the corrected offset. Note that this method
* is slow, if you correct positions far before the most
* recently added position, as it's a simple linear
- * searhc backwards through all offset corrections added
+ * search backwards through all offset corrections added
* by {@link #addOffCorrectMap}. */
protected int correct(int currentOff) {
if (pcmList == null || pcmList.isEmpty()) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharFilter.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharFilter.java Fri Sep 11 06:12:13 2009
@@ -21,6 +21,9 @@
/**
* Subclasses of CharFilter can be chained to filter CharStream.
+ * They can be used as {@link java.io.Reader} with additional offset
+ * correction. {@link Tokenizer}s will automatically use {@link #correctOffset}
+ * if a CharFilter/CharStream subclass is used.
*
* @version $Id$
*
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java Fri Sep 11 06:12:13 2009
@@ -20,12 +20,11 @@
import java.io.Reader;
/**
- * CharStream adds <a
- * href="CharStream.html#correctOffset(int)">correctOffset</a>
- * functionality over Reader. All Tokenizers accept a
- * CharStream as input, which enables arbitrary character
- * based filtering before tokenization. The {@link
- * #correctOffset} method fixed offsets to account for
+ * CharStream adds {@link #correctOffset}
+ * functionality over {@link Reader}. All Tokenizers accept a
+ * CharStream instead of {@link Reader} as input, which enables
+ * arbitrary character based filtering before tokenization.
+ * The {@link #correctOffset} method fixed offsets to account for
* removal or insertion of characters, so that the offsets
* reported in the tokens match the character offsets of the
* original Reader.
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharTokenizer.java Fri Sep 11 06:12:13 2009
@@ -104,13 +104,13 @@
}
termAtt.setTermLength(length);
- offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start+length));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
}
public final void end() {
// set final offset
- int finalOffset = input.correctOffset(offset);
+ int finalOffset = correctOffset(offset);
offsetAtt.setOffset(finalOffset, finalOffset);
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordTokenizer.java Fri Sep 11 06:12:13 2009
@@ -76,8 +76,8 @@
buffer = termAtt.resizeTermBuffer(1+buffer.length);
}
termAtt.setTermLength(upto);
- finalOffset = input.correctOffset(upto);
- offsetAtt.setOffset(input.correctOffset(0), finalOffset);
+ finalOffset = correctOffset(upto);
+ offsetAtt.setOffset(correctOffset(0), finalOffset);
return true;
}
return false;
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java Fri Sep 11 06:12:13 2009
@@ -18,6 +18,7 @@
package org.apache.lucene.analysis;
import java.io.IOException;
+import java.io.Reader;
import java.util.LinkedList;
/**
@@ -35,11 +36,18 @@
private int charPointer;
private int nextCharCounter;
+ /** Default constructor that takes a {@link CharStream}. */
public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
super(in);
this.normMap = normMap;
}
+ /** Easy-use constructor that takes a {@link Reader}. */
+ public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
+ super(CharReader.get(in));
+ this.normMap = normMap;
+ }
+
public int read() throws IOException {
while(true) {
if (replacement != null && charPointer < replacement.length()) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java Fri Sep 11 06:12:13 2009
@@ -40,7 +40,7 @@
public abstract class Tokenizer extends TokenStream {
/** The text source for this Tokenizer. */
- protected CharStream input;
+ protected Reader input;
/** Construct a tokenizer with null input. */
protected Tokenizer() {}
@@ -49,11 +49,6 @@
protected Tokenizer(Reader input) {
this.input = CharReader.get(input);
}
-
- /** Construct a token stream processing the given input. */
- protected Tokenizer(CharStream input) {
- this.input = input;
- }
/** Construct a tokenizer with null input using the given AttributeFactory. */
protected Tokenizer(AttributeFactory factory) {
@@ -65,12 +60,6 @@
super(factory);
this.input = CharReader.get(input);
}
-
- /** Construct a token stream processing the given input using the given AttributeFactory. */
- protected Tokenizer(AttributeFactory factory, CharStream input) {
- super(factory);
- this.input = input;
- }
/** Construct a token stream processing the given input using the given AttributeSource. */
protected Tokenizer(AttributeSource source) {
@@ -83,28 +72,25 @@
this.input = CharReader.get(input);
}
- /** Construct a token stream processing the given input using the given AttributeSource. */
- protected Tokenizer(AttributeSource source, CharStream input) {
- super(source);
- this.input = input;
- }
-
/** By default, closes the input Reader. */
public void close() throws IOException {
input.close();
}
+
+ /** Return the corrected offset. If {@link #input} is a {@link CharStream} subclass
+ * this method calls {@link CharStream#correctOffset}, else returns <code>currentOff</code>.
+ * @param currentOff offset as seen in the output
+ * @return corrected offset based on the input
+ * @see CharStream#correctOffset
+ */
+ protected final int correctOffset(int currentOff) {
+ return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
+ }
/** Expert: Reset the tokenizer to a new reader. Typically, an
* analyzer (in its reusableTokenStream method) will use
* this to re-use a previously created tokenizer. */
public void reset(Reader input) throws IOException {
- this.input = CharReader.get(input);
- }
-
- /** Expert: Reset the tokenizer to a new CharStream. Typically, an
- * analyzer (in its reusableTokenStream method) will use
- * this to re-use a previously created tokenizer. */
- public void reset(CharStream input) throws IOException {
this.input = input;
}
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Fri Sep 11 06:12:13 2009
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -92,10 +91,6 @@
*/
private boolean replaceInvalidAcronym;
- void setInput(Reader reader) {
- input = CharReader.get(reader);
- }
-
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
/** Set the max allowed token length. Any token longer
@@ -152,7 +147,7 @@
private void init(Reader input, boolean replaceInvalidAcronym) {
this.replaceInvalidAcronym = replaceInvalidAcronym;
- setInput(input);
+ this.input = input;
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
@@ -186,7 +181,7 @@
posIncrAtt.setPositionIncrement(posIncr);
scanner.getText(termAtt);
final int start = scanner.yychar();
- offsetAtt.setOffset(input.correctOffset(start), input.correctOffset(start+termAtt.termLength()));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.termLength()));
// This 'if' should be removed in the next release. For now, it converts
// invalid acronyms to HOST. When removed, only the 'else' part should
// remain.
@@ -210,7 +205,7 @@
public final void end() {
// set final offset
- int finalOffset = input.correctOffset(scanner.yychar() + scanner.yylength());
+ int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
offsetAtt.setOffset(finalOffset, finalOffset);
}
@@ -237,7 +232,7 @@
}
public void reset(Reader reader) throws IOException {
- setInput(reader);
+ super.reset(reader);
reset();
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java?rev=813671&r1=813670&r2=813671&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java Fri Sep 11 06:12:13 2009
@@ -41,7 +41,7 @@
}
public void testReaderReset() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
char[] buf = new char[10];
int len = cs.read(buf, 0, 10);
assertEquals( 1, len );
@@ -57,55 +57,55 @@
}
public void testNothingChange() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
}
public void test1to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
}
public void test1to2() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "j" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
}
public void test1to3() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "k" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
}
public void test2to4() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "ll" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
}
public void test2to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "aa" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
}
public void test3to1() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "bbb" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
}
public void test4to2() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "cccc" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
}
public void test5to0() throws Exception {
- CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "empty" ) ) );
+ CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
TokenStream ts = new WhitespaceTokenizer( cs );
assertTokenStreamContents(ts, new String[0]);
}