You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/06/01 12:35:16 UTC
svn commit: r950008 [4/4] - in /lucene/dev/trunk:
lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/contrib/highlighter/src/test/org/apache/lucene/search...
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java Tue Jun 1 10:35:13 2010
@@ -404,7 +404,7 @@ public class TestSynonymFilter extends B
else {
clearAttributes();
Token token = tokens[index++];
- termAtt.setEmpty().append(token.term());
+ termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java Tue Jun 1 10:35:13 2010
@@ -20,30 +20,20 @@ package org.apache.lucene.analysis.wikip
import java.io.StringReader;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*;
/**
- *
- *
+ * Basic Tests for {@link WikipediaTokenizer}
**/
public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]";
- public WikipediaTokenizerTest(String s) {
- super(s);
- }
-
public void testSimple() throws Exception {
String text = "This is a [[Category:foo]]";
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text));
@@ -51,216 +41,85 @@ public class WikipediaTokenizerTest exte
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
- new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
+ new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY },
new int[] { 1, 1, 1, 1, },
text.length());
}
public void testHandwritten() throws Exception {
- //make sure all tokens are in only one type
- String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " +
- "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] " +
- "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " +
- " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. " +
- "==heading== ===sub head=== followed by some text [[Category:blah| ]] " +
- "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed." +
- "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" +
- " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" +
- " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>";
- Map<String,String> tcm = new HashMap<String,String>();//map tokens to types
- tcm.put("link", WikipediaTokenizer.INTERNAL_LINK);
- tcm.put("display", WikipediaTokenizer.INTERNAL_LINK);
- tcm.put("info", WikipediaTokenizer.INTERNAL_LINK);
-
- tcm.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("Test", WikipediaTokenizer.EXTERNAL_LINK);
-
- //alphanums
- tcm.put("This", "<ALPHANUM>");
- tcm.put("is", "<ALPHANUM>");
- tcm.put("a", "<ALPHANUM>");
- tcm.put("Category", "<ALPHANUM>");
- tcm.put("linked", "<ALPHANUM>");
- tcm.put("parens", "<ALPHANUM>");
- tcm.put("external", "<ALPHANUM>");
- tcm.put("URL", "<ALPHANUM>");
- tcm.put("and", "<ALPHANUM>");
- tcm.put("period", "<ALPHANUM>");
- tcm.put("Here", "<ALPHANUM>");
- tcm.put("Here's", "<APOSTROPHE>");
- tcm.put("here", "<ALPHANUM>");
- tcm.put("Johnny", "<ALPHANUM>");
- tcm.put("followed", "<ALPHANUM>");
- tcm.put("by", "<ALPHANUM>");
- tcm.put("text", "<ALPHANUM>");
- tcm.put("that", "<ALPHANUM>");
- tcm.put("but", "<ALPHANUM>");
- tcm.put("never", "<ALPHANUM>");
- tcm.put("closed", "<ALPHANUM>");
- tcm.put("goes", "<ALPHANUM>");
- tcm.put("for", "<ALPHANUM>");
- tcm.put("this", "<ALPHANUM>");
- tcm.put("an", "<ALPHANUM>");
- tcm.put("some", "<ALPHANUM>");
- tcm.put("martian", "<ALPHANUM>");
- tcm.put("code", "<ALPHANUM>");
-
- tcm.put("foo", WikipediaTokenizer.CATEGORY);
- tcm.put("bar", WikipediaTokenizer.CATEGORY);
- tcm.put("none", WikipediaTokenizer.CATEGORY);
- tcm.put("withstanding", WikipediaTokenizer.CATEGORY);
- tcm.put("blah", WikipediaTokenizer.CATEGORY);
- tcm.put("ital", WikipediaTokenizer.CATEGORY);
- tcm.put("cat", WikipediaTokenizer.CATEGORY);
-
- tcm.put("italics", WikipediaTokenizer.ITALICS);
- tcm.put("more", WikipediaTokenizer.ITALICS);
- tcm.put("bold", WikipediaTokenizer.BOLD);
- tcm.put("same", WikipediaTokenizer.BOLD);
- tcm.put("five", WikipediaTokenizer.BOLD_ITALICS);
- tcm.put("and2", WikipediaTokenizer.BOLD_ITALICS);
- tcm.put("quotes", WikipediaTokenizer.BOLD_ITALICS);
-
- tcm.put("heading", WikipediaTokenizer.HEADING);
- tcm.put("sub", WikipediaTokenizer.SUB_HEADING);
- tcm.put("head", WikipediaTokenizer.SUB_HEADING);
+ // make sure all tokens are in only one type
+ String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] "
+ + "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] "
+ + "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' "
+ + " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. "
+ + "==heading== ===sub head=== followed by some text [[Category:blah| ]] "
+ + "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed."
+ + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this"
+ + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]"
+ + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>";
- tcm.put("Citation", WikipediaTokenizer.CITATION);
-
- tcm.put("3.25", "<NUM>");
- tcm.put("3.50", "<NUM>");
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
- int count = 0;
- int numItalics = 0;
- int numBoldItalics = 0;
- int numCategory = 0;
- int numCitation = 0;
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-
- while (tf.incrementToken()) {
- String tokText = termAtt.term();
- //System.out.println("Text: " + tokText + " Type: " + token.type());
- String expectedType = tcm.get(tokText);
- assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
- assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
- count++;
- if (typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true){
- numItalics++;
- } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS) == true){
- numBoldItalics++;
- } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true){
- numCategory++;
- }
- else if (typeAtt.type().equals(WikipediaTokenizer.CITATION) == true){
- numCitation++;
- }
- }
- assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size());
- assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4);
- assertTrue(numBoldItalics + " does not equal: " + 3 + " for numBoldItalics", numBoldItalics == 3);
- assertTrue(numCategory + " does not equal: " + 10 + " for numCategory", numCategory == 10);
- assertTrue(numCitation + " does not equal: " + 1 + " for numCitation", numCitation == 1);
+ assertTokenStreamContents(tf,
+ new String[] {"link", "This", "is", "a",
+ "foo", "Category", "This", "is", "a", "linked", "bar", "none",
+ "withstanding", "Category", "This", "is", "parens", "This", "is", "a",
+ "link", "This", "is", "an", "external", "URL",
+ "http://lucene.apache.org", "Here", "is", "italics", "and", "more",
+ "italics", "bold", "and", "five", "quotes", "This", "is", "a", "link",
+ "display", "info", "This", "is", "a", "period", "Here", "is", "3.25",
+ "and", "here", "is", "3.50", "Here's", "Johnny", "heading", "sub",
+ "head", "followed", "by", "some", "text", "blah", "ital", "cat",
+ "here", "is", "some", "that", "is", "italics", "foo", "but", "is",
+ "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo",
+ "and", "this", "http://foo.boo.com/test/test/", "Test", "Test",
+ "http://foo.boo.com/test/test/test.html", "Test", "Test",
+ "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test",
+ "Citation", "martian", "code"},
+ new String[] {INTERNAL_LINK,
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY, "<ALPHANUM>",
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", CATEGORY,
+ CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK,
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+ EXTERNAL_LINK_URL, "<ALPHANUM>", "<ALPHANUM>", ITALICS, "<ALPHANUM>",
+ ITALICS, ITALICS, BOLD, "<ALPHANUM>", BOLD_ITALICS, BOLD_ITALICS,
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", INTERNAL_LINK, INTERNAL_LINK,
+ INTERNAL_LINK, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+ "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>", "<ALPHANUM>",
+ "<ALPHANUM>", "<NUM>", "<APOSTROPHE>", "<ALPHANUM>", HEADING,
+ SUB_HEADING, SUB_HEADING, "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
+ "<ALPHANUM>", CATEGORY, CATEGORY, CATEGORY, "<ALPHANUM>", "<ALPHANUM>",
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", ITALICS, CATEGORY,
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD, CATEGORY,
+ "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", BOLD_ITALICS, CATEGORY,
+ "<ALPHANUM>", "<ALPHANUM>", EXTERNAL_LINK_URL, EXTERNAL_LINK,
+ EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK,
+ EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION,
+ "<ALPHANUM>", "<ALPHANUM>"});
}
public void testLinkPhrases() throws Exception {
-
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES));
checkLinkPhrases(tf);
-
}
private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException {
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- //The link, and here should be at the same position for phrases to work
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "again",
- termAtt.term().equals("again") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "click",
- termAtt.term().equals("click") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org",
- termAtt.term().equals("http://lucene.apache.org") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "again",
- termAtt.term().equals("again") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a",
- termAtt.term().equals("a") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "b",
- termAtt.term().equals("b") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "c",
- termAtt.term().equals("c") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "d",
- termAtt.term().equals("d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertFalse(tf.incrementToken());
+ assertTokenStreamContents(tf,
+ new String[] { "click", "link", "here", "again", "click",
+ "http://lucene.apache.org", "here", "again", "a", "b", "c", "d" },
+ new int[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1 });
}
public void testLinks() throws Exception {
String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news",
- termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
- tf.incrementToken();//skip here
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c",
- termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
- tf.incrementToken();//skip here
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c",
- termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-
- assertTrue(tf.incrementToken());
- assertFalse(tf.incrementToken());
+ assertTokenStreamContents(tf,
+ new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here",
+ "http://lucene.apache.org/java/docs/index.html?b=c", "here",
+ "https://lucene.apache.org/java/docs/index.html?b=c", "here" },
+ new String[] { EXTERNAL_LINK_URL, EXTERNAL_LINK,
+ EXTERNAL_LINK_URL, EXTERNAL_LINK,
+ EXTERNAL_LINK_URL, EXTERNAL_LINK, });
}
public void testLucene1133() throws Exception {
@@ -272,73 +131,13 @@ public class WikipediaTokenizerTest exte
checkLinkPhrases(tf);
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks);
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
- OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a b c d",
- termAtt.term().equals("a b c d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "e f g",
- termAtt.term().equals("e f g") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "there",
- termAtt.term().equals("there") == true);
-
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics here",
- termAtt.term().equals("italics here") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "something",
- termAtt.term().equals("something") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "more italics",
- termAtt.term().equals("more italics") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "h i j",
- termAtt.term().equals("h i j") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
- assertFalse(tf.incrementToken());
+ assertTokenStreamContents(tf,
+ new String[] { "a b c d", "e f g", "link", "here", "link",
+ "there", "italics here", "something", "more italics", "h i j" },
+ new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
+ new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
+ new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+ );
}
public void testBoth() throws Exception {
@@ -348,211 +147,26 @@ public class WikipediaTokenizerTest exte
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
//should output all the indivual tokens plus the untokenized tokens as well. Untokenized tokens
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
- PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
- OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
+ assertTokenStreamContents(tf,
+ new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
+ "link", "here", "link", "there", "italics here", "italics", "here",
+ "something", "more italics", "more", "italics", "h i j", "h", "i", "j" },
+ new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 },
+ new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
+ new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
+ );
+
+ // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase?
+ tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
+ int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0,
+ 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 };
FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a b c d",
- termAtt.term().equals("a b c d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a",
- termAtt.term().equals("a") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", flagsAtt.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 12, offsetAtt.endOffset() == 12);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "b",
- termAtt.term().equals("b") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 13, offsetAtt.startOffset() == 13);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 14, offsetAtt.endOffset() == 14);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "c",
- termAtt.term().equals("c") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 15, offsetAtt.startOffset() == 15);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 16, offsetAtt.endOffset() == 16);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "d",
- termAtt.term().equals("d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 17, offsetAtt.startOffset() == 17);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
-
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "e f g",
- termAtt.term().equals("e f g") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "e",
- termAtt.term().equals("e") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 33, offsetAtt.endOffset() == 33);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "f",
- termAtt.term().equals("f") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 34, offsetAtt.startOffset() == 34);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 35, offsetAtt.endOffset() == 35);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "g",
- termAtt.term().equals("g") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 36, offsetAtt.startOffset() == 36);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "there",
- termAtt.term().equals("there") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics here",
- termAtt.term().equals("italics here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics",
- termAtt.term().equals("italics") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 78, offsetAtt.endOffset() == 78);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 79, offsetAtt.startOffset() == 79);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "something",
- termAtt.term().equals("something") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "more italics",
- termAtt.term().equals("more italics") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "more",
- termAtt.term().equals("more") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 102, offsetAtt.endOffset() == 102);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics",
- termAtt.term().equals("italics") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 103, offsetAtt.startOffset() == 103);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "h i j",
- termAtt.term().equals("h i j") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "h",
- termAtt.term().equals("h") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 125, offsetAtt.endOffset() == 125);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "i",
- termAtt.term().equals("i") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 128, offsetAtt.startOffset() == 128);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 129, offsetAtt.endOffset() == 129);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "j",
- termAtt.term().equals("j") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
+ tf.reset();
+ for (int i = 0; i < expectedFlags.length; i++) {
+ assertTrue(tf.incrementToken());
+ assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags());
+ }
assertFalse(tf.incrementToken());
+ tf.close();
}
}
Modified: lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java Tue Jun 1 10:35:13 2010
@@ -23,7 +23,7 @@ import com.ibm.icu.text.RawCollationKey;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.IndexableBinaryStringTools;
import java.io.IOException;
@@ -70,7 +70,7 @@ import java.io.IOException;
public final class ICUCollationKeyFilter extends TokenFilter {
private Collator collator = null;
private RawCollationKey reusableKey = new RawCollationKey();
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
*
@@ -80,23 +80,22 @@ public final class ICUCollationKeyFilter
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
super(input);
this.collator = collator;
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- char[] termBuffer = termAtt.termBuffer();
- String termText = new String(termBuffer, 0, termAtt.termLength());
+ char[] termBuffer = termAtt.buffer();
+ String termText = new String(termBuffer, 0, termAtt.length());
collator.getRawCollationKey(termText, reusableKey);
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
reusableKey.bytes, 0, reusableKey.size);
if (encodedLength > termBuffer.length) {
- termAtt.resizeTermBuffer(encodedLength);
+ termAtt.resizeBuffer(encodedLength);
}
- termAtt.setTermLength(encodedLength);
+ termAtt.setLength(encodedLength);
IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
- termAtt.termBuffer(), 0, encodedLength);
+ termAtt.buffer(), 0, encodedLength);
return true;
} else {
return false;
Modified: lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Tue Jun 1 10:35:13 2010
@@ -21,8 +21,8 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -44,29 +44,20 @@ public final class SentenceTokenizer ext
private int tokenStart = 0, tokenEnd = 0;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public SentenceTokenizer(Reader reader) {
super(reader);
- init();
}
public SentenceTokenizer(AttributeSource source, Reader reader) {
super(source, reader);
- init();
}
public SentenceTokenizer(AttributeFactory factory, Reader reader) {
super(factory, reader);
- init();
- }
-
- private void init() {
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
@@ -112,7 +103,7 @@ public final class SentenceTokenizer ext
if (buffer.length() == 0)
return false;
else {
- termAtt.setTermBuffer(buffer.toString());
+ termAtt.setEmpty().append(buffer);
offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
typeAtt.setType("sentence");
return true;
Modified: lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java Tue Jun 1 10:35:13 2010
@@ -24,8 +24,8 @@ import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
@@ -40,9 +40,9 @@ public final class WordTokenFilter exten
private List<SegToken> tokenBuffer;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
/**
* Construct a new WordTokenizer.
@@ -52,9 +52,6 @@ public final class WordTokenFilter exten
public WordTokenFilter(TokenStream in) {
super(in);
this.wordSegmenter = new WordSegmenter();
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
@@ -63,7 +60,7 @@ public final class WordTokenFilter exten
// there are no remaining tokens from the current sentence... are there more sentences?
if (input.incrementToken()) {
// a new sentence is available: process it.
- tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset());
+ tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset());
tokenIter = tokenBuffer.iterator();
/*
* it should not be possible to have a sentence with 0 words, check just in case.
@@ -79,7 +76,7 @@ public final class WordTokenFilter exten
clearAttributes();
// There are remaining tokens from the current sentence, return the next one.
SegToken nextWord = tokenIter.next();
- termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);
+ termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length);
offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset);
typeAtt.setType("word");
return true;
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java Tue Jun 1 10:35:13 2010
@@ -150,7 +150,7 @@ public abstract class BufferedTokenStrea
return null;
} else {
Token token = new Token();
- token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
@@ -163,7 +163,7 @@ public abstract class BufferedTokenStrea
/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
clearAttributes();
- termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Tue Jun 1 10:35:13 2010
@@ -163,12 +163,12 @@ public abstract class AnalysisRequestHan
while (tokenStream.incrementToken()) {
Token token = new Token();
if (termAtt != null) {
- token.setTermBuffer(termAtt.toString());
+ token.setEmpty().append(termAtt);
}
if (bytesAtt != null) {
bytesAtt.toBytesRef(bytes);
// TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
- token.setTermBuffer(bytes.utf8ToString());
+ token.setEmpty().append(bytes.utf8ToString());
}
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
@@ -208,10 +208,10 @@ public abstract class AnalysisRequestHan
for (Token token : tokens) {
NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
- String text = fieldType.indexedToReadable(token.term());
+ String text = fieldType.indexedToReadable(token.toString());
tokenNamedList.add("text", text);
- if (!text.equals(token.term())) {
- tokenNamedList.add("raw_text", token.term());
+ if (!text.equals(token.toString())) {
+ tokenNamedList.add("raw_text", token.toString());
}
tokenNamedList.add("type", token.type());
tokenNamedList.add("start", token.startOffset());
@@ -220,7 +220,7 @@ public abstract class AnalysisRequestHan
position += token.getPositionIncrement();
tokenNamedList.add("position", position);
- if (context.getTermsToMatch().contains(token.term())) {
+ if (context.getTermsToMatch().contains(token.toString())) {
tokenNamedList.add("match", true);
}
@@ -292,7 +292,7 @@ public abstract class AnalysisRequestHan
public boolean incrementToken() throws IOException {
if (tokenIterator.hasNext()) {
Token next = tokenIterator.next();
- termAtt.copyBuffer(next.termBuffer(), 0, next.termLength());
+ termAtt.copyBuffer(next.buffer(), 0, next.length());
typeAtt.setType(next.type());
offsetAtt.setOffset(next.startOffset(), next.endOffset());
flagsAtt.setFlags(next.getFlags());
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java Tue Jun 1 10:35:13 2010
@@ -221,7 +221,7 @@ public class DocumentAnalysisRequestHand
try {
List<Token> tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer());
for (Token token : tokens) {
- termsToMatch.add(token.term());
+ termsToMatch.add(token.toString());
}
} catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java Tue Jun 1 10:35:13 2010
@@ -227,7 +227,7 @@ public class FieldAnalysisRequestHandler
if (queryValue != null && analysisRequest.isShowMatch()) {
List<Token> tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer());
for (Token token : tokens) {
- termsToMatch.add(token.term());
+ termsToMatch.add(token.toString());
}
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Tue Jun 1 10:35:13 2010
@@ -337,10 +337,7 @@ public class SpellCheckComponent extends
// create token
SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original);
- Token token = new Token();
- token.setTermBuffer(original);
- token.setStartOffset(suggestion.getStartOffset());
- token.setEndOffset(suggestion.getEndOffset());
+ Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
// get top 'count' suggestions out of 'sugQueue.size()' candidates
SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
@@ -382,7 +379,7 @@ public class SpellCheckComponent extends
while (ts.incrementToken()){
Token token = new Token();
- token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
@@ -461,7 +458,7 @@ public class SpellCheckComponent extends
if (hasFreqInfo) {
isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
}
- result.add(new String(inputToken.termBuffer(), 0, inputToken.termLength()), suggestionList);
+ result.add(new String(inputToken.buffer(), 0, inputToken.length()), suggestionList);
}
}
if (hasFreqInfo) {
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java Tue Jun 1 10:35:13 2010
@@ -136,7 +136,7 @@ public abstract class AbstractLuceneSpel
reader = determineReader(reader);
Term term = field != null ? new Term(field, "") : null;
for (Token token : tokens) {
- String tokenText = new String(token.termBuffer(), 0, token.termLength());
+ String tokenText = new String(token.buffer(), 0, token.length());
String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT),
field != null ? reader : null, //workaround LUCENE-1295
field,
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java Tue Jun 1 10:35:13 2010
@@ -113,7 +113,7 @@ public class SpellingQueryConverter exte
stream.reset();
while (stream.incrementToken()) {
Token token = new Token();
- token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setStartOffset(matcher.start());
token.setEndOffset(matcher.end());
token.setFlags(flagsAtt.getFlags());
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java Tue Jun 1 10:35:13 2010
@@ -35,9 +35,9 @@ public class TestBufferedTokenStream ext
public static class AB_Q_Stream extends BufferedTokenStream {
public AB_Q_Stream(TokenStream input) {super(input);}
protected Token process(Token t) throws IOException {
- if ("A".equals(new String(t.termBuffer(), 0, t.termLength()))) {
+ if ("A".equals(new String(t.buffer(), 0, t.length()))) {
Token t2 = read();
- if (t2!=null && "B".equals(new String(t2.termBuffer(), 0, t2.termLength()))) t.setTermBuffer("Q");
+ if (t2!=null && "B".equals(new String(t2.buffer(), 0, t2.length()))) t.setEmpty().append("Q");
if (t2!=null) pushBack(t2);
}
return t;
@@ -48,8 +48,8 @@ public class TestBufferedTokenStream ext
public static class AB_AAB_Stream extends BufferedTokenStream {
public AB_AAB_Stream(TokenStream input) {super(input);}
protected Token process(Token t) throws IOException {
- if ("A".equals(new String(t.termBuffer(), 0, t.termLength())) &&
- "B".equals(new String(peek(1).termBuffer(), 0, peek(1).termLength())))
+ if ("A".equals(new String(t.buffer(), 0, t.length())) &&
+ "B".equals(new String(peek(1).buffer(), 0, peek(1).length())))
write((Token)t.clone());
return t;
}
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java Tue Jun 1 10:35:13 2010
@@ -52,7 +52,7 @@ public class TestRemoveDuplicatesTokenFi
if (toks.hasNext()) {
clearAttributes();
Token tok = toks.next();
- termAtt.setEmpty().append(tok.term());
+ termAtt.setEmpty().append(tok);
offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
posIncAtt.setPositionIncrement(tok.getPositionIncrement());
return true;
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java Tue Jun 1 10:35:13 2010
@@ -262,7 +262,7 @@ public class TestSynonymMap extends Test
Token[] tokens = ((SynonymMap)map.submap.get( src )).synonyms;
boolean inc = false;
for( Token token : tokens ){
- if( exp.equals( new String(token.termBuffer(), 0, token.termLength()) ) )
+ if( exp.equals( new String(token.buffer(), 0, token.length()) ) )
inc = true;
}
assertTrue( inc );
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java Tue Jun 1 10:35:13 2010
@@ -54,7 +54,7 @@ class SimpleQueryConverter extends Spell
ts.reset();
while (ts.incrementToken()){
Token tok = new Token();
- tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+ tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
tok.setFlags(flagsAtt.getFlags());
tok.setPayload(payloadAtt.getPayload());
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java Tue Jun 1 10:35:13 2010
@@ -88,7 +88,7 @@ public class SpellingQueryConverterTest
for (Token token : tokens) {
int start = token.startOffset();
int end = token.endOffset();
- if (!s.substring(start, end).equals(token.term())) return false;
+ if (!s.substring(start, end).equals(token.toString())) return false;
}
return true;
}
Modified: lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp?rev=950008&r1=950007&r2=950008&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp (original)
+++ lucene/dev/trunk/solr/src/webapp/web/admin/analysis.jsp Tue Jun 1 10:35:13 2010
@@ -223,7 +223,7 @@
public boolean incrementToken() throws IOException {
if (iter.hasNext()) {
Token token = iter.next();
- termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());
@@ -267,7 +267,7 @@
break;
else {
Token token = new Token();
- token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setType(typeAtt.type());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setPayload(payloadAtt.getPayload());
@@ -289,13 +289,13 @@
}
public boolean equals(Object o) {
- return ((Tok)o).token.term().equals(token.term());
+ return ((Tok)o).token.toString().equals(token.toString());
}
public int hashCode() {
- return token.term().hashCode();
+ return token.toString().hashCode();
}
public String toString() {
- return token.term();
+ return token.toString();
}
}
@@ -377,7 +377,7 @@
boolean needRaw=false;
int pos=0;
for (Token t : tokens) {
- if (!t.term().equals(ft.indexedToReadable(t.term()))) {
+ if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
needRaw=true;
}
@@ -426,7 +426,7 @@
printRow(out,"term text", arr, new ToStr() {
public String toStr(Object o) {
- return ft.indexedToReadable( ((Tok)o).token.term() );
+ return ft.indexedToReadable( ((Tok)o).token.toString() );
}
}
,true
@@ -438,7 +438,7 @@
printRow(out,"raw text", arr, new ToStr() {
public String toStr(Object o) {
// page is UTF-8, so anything goes.
- return ((Tok)o).token.term();
+ return ((Tok)o).token.toString();
}
}
,true