You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/05/22 16:18:55 UTC
svn commit: r1125972 - in /lucene/dev/trunk/lucene/src:
test-framework/org/apache/lucene/analysis/MockTokenizer.java
test/org/apache/lucene/analysis/TestToken.java
Author: mikemccand
Date: Sun May 22 14:18:55 2011
New Revision: 1125972
URL: http://svn.apache.org/viewvc?rev=1125972&view=rev
Log:
allow MockTokenizer to take max token length; default to MAX_INT (= no change)
Modified:
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1125972&r1=1125971&r2=1125972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Sun May 22 14:18:55 2011
@@ -22,6 +22,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
@@ -53,6 +54,8 @@ public class MockTokenizer extends Token
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;
+ private final int maxTokenLength;
+ public static final int DEFAULT_MAX_TOKEN_LENGTH = Integer.MAX_VALUE;
private int state;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -74,20 +77,21 @@ public class MockTokenizer extends Token
private State streamState = State.CLOSE;
private boolean enableChecks = true;
- public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
+ public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
super(factory, input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
this.streamState = State.SETREADER;
+ this.maxTokenLength = maxTokenLength;
+ }
+
+ public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
+ this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength);
}
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(input);
- this.runAutomaton = runAutomaton;
- this.lowerCase = lowerCase;
- this.state = runAutomaton.getInitialState();
- this.streamState = State.SETREADER;
+ this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
}
@Override
@@ -107,6 +111,9 @@ public class MockTokenizer extends Token
for (int i = 0; i < chars.length; i++)
termAtt.append(chars[i]);
endOffset = off;
+ if (termAtt.length() >= maxTokenLength) {
+ break;
+ }
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java?rev=1125972&r1=1125971&r2=1125972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java Sun May 22 14:18:55 2011
@@ -225,7 +225,7 @@ public class TestToken extends LuceneTes
}
public void testTokenAttributeFactory() throws Exception {
- TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
assertTrue("SenselessAttribute is not implemented by SenselessAttributeImpl",
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);