You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/17 22:16:42 UTC
svn commit: r1104519 [2/2] - in /lucene/dev/trunk: lucene/contrib/
lucene/src/test-framework/org/apache/lucene/analysis/
modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/
modules/analysis/common/src/java/org/apache/lucene/analysi...
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Tue May 17 20:16:40 2011
@@ -19,12 +19,11 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -127,8 +126,8 @@ public class TestWordDelimiterFilter ext
}
public void doSplit(final String input, String... output) throws Exception {
- WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
+ new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf, output);
}
@@ -169,8 +168,8 @@ public class TestWordDelimiterFilter ext
}
public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
- WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), 1,1,0,0,0,1,0,1,stemPossessive, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
+ new StringReader(input), MockTokenizer.KEYWORD, false), 1,1,0,0,0,1,0,1,stemPossessive, null);
assertTokenStreamContents(wdf, output);
}
@@ -216,7 +215,7 @@ public class TestWordDelimiterFilter ext
@Override
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false),
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
}
};
@@ -244,7 +243,7 @@ public class TestWordDelimiterFilter ext
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
new LargePosIncTokenFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)),
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)),
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
}
};
@@ -276,7 +275,7 @@ public class TestWordDelimiterFilter ext
@Override
public TokenStream tokenStream(String field, Reader reader) {
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), StandardAnalyzer.STOP_WORDS_SET);
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET);
filter.setEnablePositionIncrements(true);
return new WordDelimiterFilter(filter,
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Tue May 17 20:16:40 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ngram
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@@ -32,7 +33,7 @@ public class EdgeNGramTokenFilterTest ex
@Override
public void setUp() throws Exception {
super.setUp();
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
}
public void testInvalidInput() throws Exception {
@@ -91,7 +92,7 @@ public class EdgeNGramTokenFilterTest ex
}
public void testSmallTokenInStream() throws Exception {
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
+ input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Tue May 17 20:16:40 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ngram
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@@ -32,7 +33,7 @@ public class NGramTokenFilterTest extend
@Override
public void setUp() throws Exception {
super.setUp();
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
}
public void testInvalidInput() throws Exception {
@@ -80,7 +81,7 @@ public class NGramTokenFilterTest extend
}
public void testSmallTokenInStream() throws Exception {
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
+ input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java Tue May 17 20:16:40 2011
@@ -24,8 +24,8 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Tests {@link PatternReplaceCharFilter}
@@ -39,7 +39,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "this is test.";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "this", "is", "test." },
new int[] { 0, 5, 8 },
@@ -52,8 +52,8 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "aa bb cc";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
- assertFalse(ts.incrementToken());
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new String[] {});
}
// 012345678
@@ -63,7 +63,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "aa bb cc";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2#$3",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa#bb#cc" },
new int[] { 0 },
@@ -78,7 +78,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "aa bb cc dd";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa##bb###cc", "dd" },
new int[] { 0, 9 },
@@ -92,7 +92,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = " a a";
CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "aa" },
new int[] { 1, 4 },
@@ -107,7 +107,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "aa bb cc dd";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa#bb", "dd" },
new int[] { 0, 12 },
@@ -122,7 +122,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = " aa bb cc --- aa bb aa bb cc";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" },
new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 },
@@ -137,7 +137,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2", ".",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" },
new int[] { 2, 8, 11, 15, 21, 25, 28, 36 },
@@ -154,7 +154,7 @@ public class TestPatternReplaceCharFilte
CharReader.get( new StringReader( BLOCK ) ) );
cs = new PatternReplaceCharFilter( pattern("bb"), "b", ".", cs );
cs = new PatternReplaceCharFilter( pattern("ccc"), "c", ".", cs );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 },
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java Tue May 17 20:16:40 2011
@@ -18,8 +18,8 @@
package org.apache.lucene.analysis.pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.StringReader;
import java.util.regex.Pattern;
@@ -32,7 +32,7 @@ public class TestPatternReplaceFilter ex
public void testReplaceAll() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
"-", true);
assertTokenStreamContents(ts,
@@ -42,7 +42,7 @@ public class TestPatternReplaceFilter ex
public void testReplaceFirst() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
"-", false);
assertTokenStreamContents(ts,
@@ -52,7 +52,7 @@ public class TestPatternReplaceFilter ex
public void testStripFirst() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
null, false);
assertTokenStreamContents(ts,
@@ -62,7 +62,7 @@ public class TestPatternReplaceFilter ex
public void testStripAll() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
null, true);
assertTokenStreamContents(ts,
@@ -72,7 +72,7 @@ public class TestPatternReplaceFilter ex
public void testReplaceAllWithBackRef() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("(a*)b"),
"$1\\$", true);
assertTokenStreamContents(ts,
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Tue May 17 20:16:40 2011
@@ -16,8 +16,8 @@ package org.apache.lucene.analysis.paylo
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
@@ -30,7 +30,7 @@ public class DelimitedPayloadTokenFilter
public void testPayloads() throws Exception {
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
+ (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
@@ -51,7 +51,7 @@ public class DelimitedPayloadTokenFilter
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
+ (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
assertTermEquals("The", filter, null);
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
@@ -69,7 +69,7 @@ public class DelimitedPayloadTokenFilter
public void testFloatEncoding() throws Exception {
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
- DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
@@ -87,7 +87,7 @@ public class DelimitedPayloadTokenFilter
public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
- DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
@@ -106,6 +106,7 @@ public class DelimitedPayloadTokenFilter
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
+ stream.reset();
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
Payload payload = payloadAtt.getPayload();
@@ -122,6 +123,7 @@ public class DelimitedPayloadTokenFilter
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+ stream.reset();
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
Payload payload = payAtt.getPayload();
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java Tue May 17 20:16:40 2011
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.paylo
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,11 +32,12 @@ public class NumericPayloadTokenFilterTe
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
- NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
+ NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), 3, "D");
boolean seenDogs = false;
CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
+ nptf.reset();
while (nptf.incrementToken()) {
if (termAtt.toString().equals("dogs")) {
seenDogs = true;
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java Tue May 17 20:16:40 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.paylo
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
@@ -30,11 +30,11 @@ public class TokenOffsetPayloadTokenFilt
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
- TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+ TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
int count = 0;
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);
-
+ nptf.reset();
while (nptf.incrementToken()) {
Payload pay = payloadAtt.getPayload();
assertTrue("pay is null and it shouldn't be", pay != null);
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java Tue May 17 20:16:40 2011
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.paylo
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,12 +32,12 @@ public class TypeAsPayloadTokenFilterTes
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
- TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
+ TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
int count = 0;
CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
-
+ nptf.reset();
while (nptf.incrementToken()) {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java Tue May 17 20:16:40 2011
@@ -22,6 +22,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
@@ -50,7 +52,7 @@ public class QueryAutoStopWordAnalyzerTe
public void setUp() throws Exception {
super.setUp();
dir = new RAMDirectory();
- appAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+ appAnalyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, appAnalyzer));
int numDocs = 200;
for (int i = 0; i < numDocs; i++) {
@@ -159,9 +161,9 @@ public class QueryAutoStopWordAnalyzerTe
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
- return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
else
- return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
}
}
@@ -175,7 +177,7 @@ public class QueryAutoStopWordAnalyzerTe
}
public void testTokenStream() throws Exception {
- QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
a.addStopWords(reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
assertTokenStreamContents(ts, new String[] { "this" });
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Tue May 17 20:16:40 2011
@@ -19,22 +19,22 @@ package org.apache.lucene.analysis.rever
import java.io.StringReader;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testFilter() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("Do have a nice day")); // 1-4 length string
+ TokenStream stream = new MockTokenizer(new StringReader("Do have a nice day"),
+ MockTokenizer.WHITESPACE, false); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" });
}
public void testFilterWithMark() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "Do have a nice day")); // 1-4 length string
+ TokenStream stream = new MockTokenizer(new StringReader("Do have a nice day"),
+ MockTokenizer.WHITESPACE, false); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
assertTokenStreamContents(filter,
new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" });
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Tue May 17 20:16:40 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestRussianLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new RussianLightStemFilter(source));
}
};
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Tue May 17 20:16:40 2011
@@ -22,10 +22,9 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
@@ -106,7 +105,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"test sentence");
int[] ranks = new int[] { 1, 2, 0 };
compareRanks(hits, ranks);
@@ -117,7 +116,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"\"this sentence\"");
int[] ranks = new int[] { 0 };
compareRanks(hits, ranks);
@@ -128,7 +127,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"\"test sentence\"");
int[] ranks = new int[] { 1 };
compareRanks(hits, ranks);
@@ -139,7 +138,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"+test +sentence");
int[] ranks = new int[] { 1, 2 };
compareRanks(hits, ranks);
@@ -149,7 +148,7 @@ public class ShingleAnalyzerWrapperTest
* This shows how to construct a phrase query containing shingles.
*/
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
- Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+ Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
searcher = setUpSearcher(analyzer);
PhraseQuery q = new PhraseQuery();
@@ -161,6 +160,7 @@ public class ShingleAnalyzerWrapperTest
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
while (ts.incrementToken()) {
j += posIncrAtt.getPositionIncrement();
String termText = termAtt.toString();
@@ -178,7 +178,7 @@ public class ShingleAnalyzerWrapperTest
* in the right order and adjacent to each other.
*/
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
- Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+ Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
searcher = setUpSearcher(analyzer);
BooleanQuery q = new BooleanQuery();
@@ -188,6 +188,8 @@ public class ShingleAnalyzerWrapperTest
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
+
while (ts.incrementToken()) {
String termText = termAtt.toString();
q.add(new TermQuery(new Term("content", termText)),
@@ -200,7 +202,7 @@ public class ShingleAnalyzerWrapperTest
}
public void testReusableTokenStream() throws Exception {
- Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+ Analyzer a = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
assertAnalyzesToReuse(a, "please divide into shingles",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
@@ -222,9 +224,9 @@ public class ShingleAnalyzerWrapperTest
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
- return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
else
- return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
}
}
@@ -249,7 +251,7 @@ public class ShingleAnalyzerWrapperTest
public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 4);
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this", "please divide this sentence",
"divide", "divide this sentence", "divide this sentence into",
@@ -273,7 +275,7 @@ public class ShingleAnalyzerWrapperTest
public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 3);
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 3);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this",
"divide", "divide this sentence",
@@ -297,7 +299,7 @@ public class ShingleAnalyzerWrapperTest
public void testNoTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setTokenSeparator("");
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
@@ -319,7 +321,7 @@ public class ShingleAnalyzerWrapperTest
public void testNullTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setTokenSeparator(null);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
@@ -340,7 +342,7 @@ public class ShingleAnalyzerWrapperTest
}
public void testAltTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setTokenSeparator("<SEP>");
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "please<SEP>divide",
@@ -362,7 +364,7 @@ public class ShingleAnalyzerWrapperTest
public void testOutputUnigramsIfNoShinglesSingleToken() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setOutputUnigrams(false);
analyzer.setOutputUnigramsIfNoShingles(true);
assertAnalyzesToReuse(analyzer, "please",
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java Tue May 17 20:16:40 2011
@@ -22,14 +22,14 @@ import java.text.SimpleDateFormat;
import java.util.Locale;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
- TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+ TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
int count = 0;
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java Tue May 17 20:16:40 2011
@@ -84,7 +84,7 @@ public class TestTeeSinkTokenFilter exte
// with BaseTokenStreamTestCase now...
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
Directory dir = newDirectory();
- Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+ Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd ")));
@@ -108,7 +108,7 @@ public class TestTeeSinkTokenFilter exte
}
public void testGeneral() throws IOException {
- final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString())));
+ final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
final TokenStream sink1 = source.newSinkTokenStream();
final TokenStream sink2 = source.newSinkTokenStream(theFilter);
@@ -122,16 +122,17 @@ public class TestTeeSinkTokenFilter exte
}
public void testMultipleSources() throws Exception {
- final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString())));
+ final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
+ tee1.reset();
final TokenStream source1 = new CachingTokenFilter(tee1);
tee1.addAttribute(CheckClearAttributesAttribute.class);
dogDetector.addAttribute(CheckClearAttributesAttribute.class);
theDetector.addAttribute(CheckClearAttributesAttribute.class);
- final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer2.toString())));
+ final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false));
tee2.addSinkTokenStream(dogDetector);
tee2.addSinkTokenStream(theDetector);
final TokenStream source2 = tee2;
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Tue May 17 20:16:40 2011
@@ -20,14 +20,14 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
String test = "The quick red fox jumped over the lazy brown dogs";
- TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+ TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
int count = 0;
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java Tue May 17 20:16:40 2011
@@ -20,9 +20,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,7 +32,7 @@ public class TokenTypeSinkTokenizerTest
TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
String test = "The quick red fox jumped over the lazy brown dogs";
- TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
+ TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
boolean seenDogs = false;
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Tue May 17 20:16:40 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestSwedishLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SwedishLightStemFilter(source));
}
};
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java Tue May 17 20:16:40 2011
@@ -25,6 +25,7 @@ import java.util.Collection;
import java.util.List;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -43,14 +44,14 @@ public class TestSynonymFilter extends B
static void assertTokenizesTo(SynonymMap dict, String input,
String expected[]) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
SynonymFilter stream = new SynonymFilter(tokenizer, dict);
assertTokenStreamContents(stream, expected);
}
static void assertTokenizesTo(SynonymMap dict, String input,
String expected[], int posIncs[]) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
SynonymFilter stream = new SynonymFilter(tokenizer, dict);
assertTokenStreamContents(stream, expected, posIncs);
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java?rev=1104519&r1=1104518&r2=1104519&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java Tue May 17 20:16:40 2011
@@ -20,8 +20,8 @@ package org.apache.lucene.analysis.tr;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test the Turkish lowercase filter.
@@ -32,8 +32,8 @@ public class TestTurkishLowerCaseFilter
* Test composed forms
*/
public void testTurkishLowerCaseFilter() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "\u0130STANBUL \u0130ZM\u0130R ISPARTA"));
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "\u0130STANBUL \u0130ZM\u0130R ISPARTA"), MockTokenizer.WHITESPACE, false);
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
"\u0131sparta",});
@@ -43,8 +43,8 @@ public class TestTurkishLowerCaseFilter
* Test decomposed forms
*/
public void testDecomposed() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"));
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"), MockTokenizer.WHITESPACE, false);
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
"\u0131sparta",});
@@ -56,8 +56,8 @@ public class TestTurkishLowerCaseFilter
* to U+0130 + U+0316, and is lowercased the same way.
*/
public void testDecomposed2() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"));
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"), MockTokenizer.WHITESPACE, false);
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
"\u0131\u0316sparta",});