You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/05/22 16:18:55 UTC

svn commit: r1125972 - in /lucene/dev/trunk/lucene/src: test-framework/org/apache/lucene/analysis/MockTokenizer.java test/org/apache/lucene/analysis/TestToken.java

Author: mikemccand
Date: Sun May 22 14:18:55 2011
New Revision: 1125972

URL: http://svn.apache.org/viewvc?rev=1125972&view=rev
Log:
allow MockTokenizer to take max token length; default to MAX_INT (= no change)

Modified:
    lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java

Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1125972&r1=1125971&r2=1125972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Sun May 22 14:18:55 2011
@@ -22,6 +22,7 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
 
@@ -53,6 +54,8 @@ public class MockTokenizer extends Token
 
   private final CharacterRunAutomaton runAutomaton;
   private final boolean lowerCase;
+  private final int maxTokenLength;
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = Integer.MAX_VALUE;
   private int state;
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -74,20 +77,21 @@ public class MockTokenizer extends Token
   private State streamState = State.CLOSE;
   private boolean enableChecks = true;
   
-  public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
+  public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
     super(factory, input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
     this.streamState = State.SETREADER;
+    this.maxTokenLength = maxTokenLength;
+  }
+
+  public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) {
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength);
   }
 
   public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(input);
-    this.runAutomaton = runAutomaton;
-    this.lowerCase = lowerCase;
-    this.state = runAutomaton.getInitialState();
-    this.streamState = State.SETREADER;
+    this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
   }
   
   @Override
@@ -107,6 +111,9 @@ public class MockTokenizer extends Token
           for (int i = 0; i < chars.length; i++)
             termAtt.append(chars[i]);
           endOffset = off;
+          if (termAtt.length() >= maxTokenLength) {
+            break;
+          }
           cp = readCodePoint();
         } while (cp >= 0 && isTokenChar(cp));
         offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java?rev=1125972&r1=1125971&r2=1125972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java Sun May 22 14:18:55 2011
@@ -225,7 +225,7 @@ public class TestToken extends LuceneTes
   }
 
   public void testTokenAttributeFactory() throws Exception {
-    TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false);
+    TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
     
     assertTrue("SenselessAttribute is not implemented by SenselessAttributeImpl",
       ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);