You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/01/09 14:27:36 UTC
svn commit: r1556801 [5/10] - in /lucene/dev/trunk: lucene/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/ luc...
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Thu Jan 9 13:27:29 2014
@@ -17,8 +17,6 @@
package org.apache.lucene.analysis.miscellaneous;
-import java.io.Reader;
-import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
@@ -41,12 +39,12 @@ public class TestKeepWordFilter extends
String input = "xxx yyy aaa zzz BBB ccc ddd EEE";
// Test Stopwords
- TokenStream stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(input);
stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
// Now force case
- stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+ stream = whitespaceMockTokenizer(input);
stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
}
@@ -60,8 +58,8 @@ public class TestKeepWordFilter extends
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
return new TokenStreamComponents(tokenizer, stream);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java Thu Jan 9 13:27:29 2014
@@ -43,16 +43,13 @@ public class TestKeywordMarkerFilter ext
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
"jumps" };
assertTokenStreamContents(new LowerCaseFilterMock(
- new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
- "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
+ new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), set)), output);
CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false);
assertTokenStreamContents(new LowerCaseFilterMock(
- new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
- "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output);
+ new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), mixedCaseSet)), output);
CharArraySet set2 = set;
assertTokenStreamContents(new LowerCaseFilterMock(
- new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
- "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
+ new SetKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), set2)), output);
}
@Test
@@ -60,15 +57,13 @@ public class TestKeywordMarkerFilter ext
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
"jumps" };
assertTokenStreamContents(new LowerCaseFilterMock(
- new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader(
- "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[fF]ox"))), output);
+ new PatternKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), Pattern.compile("[a-zA-Z]+[fF]ox"))), output);
output = new String[] { "the", "quick", "brown", "lucenefox",
"jumps" };
assertTokenStreamContents(new LowerCaseFilterMock(
- new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader(
- "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[f]ox"))), output);
+ new PatternKeywordMarkerFilter(whitespaceMockTokenizer("The quIck browN LuceneFox Jumps"), Pattern.compile("[a-zA-Z]+[f]ox"))), output);
}
// LUCENE-2901
@@ -76,7 +71,7 @@ public class TestKeywordMarkerFilter ext
TokenStream ts = new LowerCaseFilterMock(
new SetKeywordMarkerFilter(
new SetKeywordMarkerFilter(
- new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+ whitespaceMockTokenizer("Dogs Trees Birds Houses"),
new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)),
new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
@@ -85,7 +80,7 @@ public class TestKeywordMarkerFilter ext
ts = new LowerCaseFilterMock(
new PatternKeywordMarkerFilter(
new PatternKeywordMarkerFilter(
- new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+ whitespaceMockTokenizer("Dogs Trees Birds Houses"),
Pattern.compile("Birds|Houses")),
Pattern.compile("Dogs|Trees")));
@@ -94,7 +89,7 @@ public class TestKeywordMarkerFilter ext
ts = new LowerCaseFilterMock(
new SetKeywordMarkerFilter(
new PatternKeywordMarkerFilter(
- new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+ whitespaceMockTokenizer("Dogs Trees Birds Houses"),
Pattern.compile("Birds|Houses")),
new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.misce
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
@@ -32,7 +31,7 @@ public class TestKeywordMarkerFilterFact
public void testKeywords() throws Exception {
Reader reader = new StringReader("dogs cats");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
new StringMockResourceLoader("cats"),
"protected", "protwords.txt").create(stream);
@@ -42,7 +41,7 @@ public class TestKeywordMarkerFilterFact
public void testKeywords2() throws Exception {
Reader reader = new StringReader("dogs cats");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("KeywordMarker",
"pattern", "cats|Dogs").create(stream);
stream = tokenFilterFactory("PorterStem").create(stream);
@@ -51,7 +50,7 @@ public class TestKeywordMarkerFilterFact
public void testKeywordsMixed() throws Exception {
Reader reader = new StringReader("dogs cats birds");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
new StringMockResourceLoader("cats"),
"protected", "protwords.txt",
@@ -62,7 +61,7 @@ public class TestKeywordMarkerFilterFact
public void testKeywordsCaseInsensitive() throws Exception {
Reader reader = new StringReader("dogs cats Cats");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
new StringMockResourceLoader("cats"),
"protected", "protwords.txt",
@@ -73,7 +72,7 @@ public class TestKeywordMarkerFilterFact
public void testKeywordsCaseInsensitive2() throws Exception {
Reader reader = new StringReader("dogs cats Cats");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("KeywordMarker",
"pattern", "Cats",
"ignoreCase", "true").create(stream);
@@ -83,7 +82,7 @@ public class TestKeywordMarkerFilterFact
public void testKeywordsCaseInsensitiveMixed() throws Exception {
Reader reader = new StringReader("dogs cats Cats Birds birds");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
new StringMockResourceLoader("cats"),
"protected", "protwords.txt",
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordRepeatFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordRepeatFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordRepeatFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordRepeatFilter.java Thu Jan 9 13:27:29 2014
@@ -18,25 +18,23 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import java.io.IOException;
-import java.io.StringReader;
public class TestKeywordRepeatFilter extends BaseTokenStreamTestCase {
public void testBasic() throws IOException {
TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(
- new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false)), "English"));
+ whitespaceMockTokenizer("the birds are flying")), "English"));
assertTokenStreamContents(ts, new String[] { "the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
}
public void testComposition() throws IOException {
TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(
- new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false))), "English"));
+ whitespaceMockTokenizer("the birds are flying"))), "English"));
assertTokenStreamContents(ts, new String[] { "the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Thu Jan 9 13:27:29 2014
@@ -31,8 +31,7 @@ import org.apache.lucene.analysis.core.K
public class TestLengthFilter extends BaseTokenStreamTestCase {
public void testFilterWithPosIncr() throws Exception {
- TokenStream stream = new MockTokenizer(
- new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer("short toolong evenmuchlongertext a ab toolong foo");
LengthFilter filter = new LengthFilter(TEST_VERSION_CURRENT, stream, 2, 6);
assertTokenStreamContents(filter,
new String[]{"short", "ab", "foo"},
@@ -43,8 +42,8 @@ public class TestLengthFilter extends Ba
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -21,13 +21,15 @@ import java.io.StringReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
public class TestLengthFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testPositionIncrements() throws Exception {
Reader reader = new StringReader("foo foobar super-duper-trooper");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ ((Tokenizer)stream).setReader(reader);
stream = tokenFilterFactory("Length",
"min", "4",
"max", "10").create(stream);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -27,7 +27,8 @@ public class TestLimitTokenCountFilterFa
public void test() throws Exception {
Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ tokenizer.setReader(reader);
// LimitTokenCountFilter doesn't consume the entire stream that it wraps
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java Thu Jan 9 13:27:29 2014
@@ -34,8 +34,8 @@ public class TestLimitTokenPositionFilte
for (final boolean consumeAll : new boolean[] { true, false }) {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
return new TokenStreamComponents(tokenizer, new LimitTokenPositionFilter(tokenizer, 2, consumeAll));
@@ -59,7 +59,7 @@ public class TestLimitTokenPositionFilte
}
public void testMaxPosition3WithSynomyms() throws IOException {
- MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
+ MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
SynonymMap.Builder builder = new SynonymMap.Builder(true);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -27,7 +27,7 @@ public class TestLimitTokenPositionFilte
public void testMaxPosition1() throws Exception {
Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
// LimitTokenPositionFilter doesn't consume the entire stream that it wraps
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
@@ -48,7 +48,7 @@ public class TestLimitTokenPositionFilte
public void testMaxPosition1WithShingles() throws Exception {
Reader reader = new StringReader("one two three four five");
- MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
// LimitTokenPositionFilter doesn't consume the entire stream that it wraps
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
@@ -63,7 +63,7 @@ public class TestLimitTokenPositionFilte
public void testConsumeAllTokens() throws Exception {
Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("LimitTokenPosition",
"maxTokenPosition", "3",
"consumeAllTokens", "true").create(stream);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java Thu Jan 9 13:27:29 2014
@@ -65,8 +65,8 @@ public class TestPerFieldAnalyzerWrapper
public void testCharFilters() throws Exception {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new MockTokenizer(reader));
+ protected TokenStreamComponents createComponents(String fieldName) {
+ return new TokenStreamComponents(new MockTokenizer());
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java Thu Jan 9 13:27:29 2014
@@ -28,9 +28,11 @@ public class TestPrefixAndSuffixAwareTok
public void test() throws IOException {
+ final MockTokenizer input = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ input.setReader(new StringReader("hello world"));
PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
new SingleTokenTokenStream(createToken("^", 0, 0)),
- new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false),
+ input,
new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java Thu Jan 9 13:27:29 2014
@@ -40,8 +40,10 @@ public class TestPrefixAwareTokenFilter
// prefix and suffix using 2x prefix
+ final MockTokenizer suffix = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ suffix.setReader(new StringReader("hello world"));
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
- new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false));
+ suffix);
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java Thu Jan 9 13:27:29 2014
@@ -156,8 +156,8 @@ public class TestRemoveDuplicatesTokenFi
final Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
}
@@ -170,8 +170,8 @@ public class TestRemoveDuplicatesTokenFi
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilter.java Thu Jan 9 13:27:29 2014
@@ -31,8 +31,8 @@ public class TestScandinavianFoldingFilt
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String field, Reader reader) {
- final Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String field) {
+ final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new ScandinavianFoldingFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
@@ -111,8 +111,8 @@ public class TestScandinavianFoldingFilt
public void testEmptyTerm() throws Exception {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new ScandinavianFoldingFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianFoldingFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -27,7 +27,7 @@ public class TestScandinavianFoldingFilt
public void testStemming() throws Exception {
Reader reader = new StringReader("räksmörgås");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("ScandinavianFolding").create(stream);
assertTokenStreamContents(stream, new String[] { "raksmorgas" });
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilter.java Thu Jan 9 13:27:29 2014
@@ -32,8 +32,8 @@ public class TestScandinavianNormalizati
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String field, Reader reader) {
- final Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String field) {
+ final Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
final TokenStream stream = new ScandinavianNormalizationFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
@@ -110,8 +110,8 @@ public class TestScandinavianNormalizati
public void testEmptyTerm() throws Exception {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new ScandinavianNormalizationFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestScandinavianNormalizationFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -26,8 +26,7 @@ import java.io.StringReader;
public class TestScandinavianNormalizationFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
- Reader reader = new StringReader("räksmörgås");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer("räksmörgås");
stream = tokenFilterFactory("ScandinavianNormalization").create(stream);
assertTokenStreamContents(stream, new String[] { "ræksmørgås" });
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Thu Jan 9 13:27:29 2014
@@ -37,13 +37,21 @@ import org.apache.lucene.util._TestUtil;
*
*/
public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
+
+ private KeywordTokenizer keywordTokenizer(String data) throws IOException {
+ KeywordTokenizer tokenizer = new KeywordTokenizer();
+ tokenizer.setReader(new StringReader(data));
+ return tokenizer;
+ }
+
+
public void testOverride() throws IOException {
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
builder.add("booked", "books");
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
+ Tokenizer tokenizer = keywordTokenizer("booked");
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
tokenizer, builder.build()));
assertTokenStreamContents(stream, new String[] {"books"});
@@ -55,7 +63,7 @@ public class TestStemmerOverrideFilter e
// but also mark it with KeywordAttribute so Porter will not change it.
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
builder.add("boOkEd", "books");
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
+ Tokenizer tokenizer = keywordTokenizer("BooKeD");
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
tokenizer, builder.build()));
assertTokenStreamContents(stream, new String[] {"books"});
@@ -63,7 +71,7 @@ public class TestStemmerOverrideFilter e
public void testNoOverrides() throws IOException {
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
+ Tokenizer tokenizer = keywordTokenizer("book");
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
tokenizer, builder.build()));
assertTokenStreamContents(stream, new String[] {"book"});
@@ -105,8 +113,8 @@ public class TestStemmerOverrideFilter e
output.add(entry.getValue());
}
}
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input.toString()));
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT);
+ tokenizer.setReader(new StringReader(input.toString()));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
tokenizer, builder.build()));
assertTokenStreamContents(stream, output.toArray(new String[0]));
@@ -135,8 +143,8 @@ public class TestStemmerOverrideFilter e
StemmerOverrideMap build = builder.build();
for (Entry<String,String> entry : entrySet) {
if (random().nextBoolean()) {
- Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
- entry.getKey()));
+ Tokenizer tokenizer = new KeywordTokenizer();
+ tokenizer.setReader(new StringReader(entry.getKey()));
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
tokenizer, build));
assertTokenStreamContents(stream, new String[] {entry.getValue()});
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.misce
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
@@ -32,7 +31,7 @@ public class TestStemmerOverrideFilterFa
public void testKeywords() throws Exception {
// our stemdict stems dogs to 'cat'
Reader reader = new StringReader("testing dogs");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
new StringMockResourceLoader("dogs\tcat"),
"dictionary", "stemdict.txt").create(stream);
@@ -43,7 +42,7 @@ public class TestStemmerOverrideFilterFa
public void testKeywordsCaseInsensitive() throws Exception {
Reader reader = new StringReader("testing DoGs");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
new StringMockResourceLoader("dogs\tcat"),
"dictionary", "stemdict.txt",
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Thu Jan 9 13:27:29 2014
@@ -98,8 +98,8 @@ public class TestTrimFilter extends Base
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer));
}
};
@@ -109,8 +109,8 @@ public class TestTrimFilter extends Base
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
final Version version = TEST_VERSION_CURRENT;
return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer));
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.util.B
public class TestTrimFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testTrimming() throws Exception {
Reader reader = new StringReader("trim me ");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ TokenStream stream = keywordMockTokenizer(reader);
stream = tokenFilterFactory("Trim").create(stream);
assertTokenStreamContents(stream, new String[] { "trim me" });
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Thu Jan 9 13:27:29 2014
@@ -18,10 +18,8 @@
package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -29,8 +27,6 @@ import org.apache.lucene.analysis.util.C
import org.junit.Test;
import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
import java.util.*;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
@@ -130,8 +126,8 @@ public class TestWordDelimiterFilter ext
public void doSplit(final String input, String... output) throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
- WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
- new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(keywordMockTokenizer(input),
+ WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, flags, null);
assertTokenStreamContents(wdf, output);
}
@@ -174,8 +170,7 @@ public class TestWordDelimiterFilter ext
public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS;
flags |= (stemPossessive == 1) ? STEM_ENGLISH_POSSESSIVE : 0;
- WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
- new StringReader(input), MockTokenizer.KEYWORD, false), flags, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(keywordMockTokenizer(input), flags, null);
assertTokenStreamContents(wdf, output);
}
@@ -220,8 +215,8 @@ public class TestWordDelimiterFilter ext
/* analyzer that uses whitespace + wdf */
Analyzer a = new Analyzer() {
@Override
- public TokenStreamComponents createComponents(String field, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ public TokenStreamComponents createComponents(String field) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
tokenizer,
flags, protWords));
@@ -257,8 +252,8 @@ public class TestWordDelimiterFilter ext
/* analyzer that will consume tokens with large position increments */
Analyzer a2 = new Analyzer() {
@Override
- public TokenStreamComponents createComponents(String field, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ public TokenStreamComponents createComponents(String field) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
new LargePosIncTokenFilter(tokenizer),
flags, protWords));
@@ -302,8 +297,8 @@ public class TestWordDelimiterFilter ext
Analyzer a3 = new Analyzer() {
@Override
- public TokenStreamComponents createComponents(String field, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ public TokenStreamComponents createComponents(String field) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
tokenizer, StandardAnalyzer.STOP_WORDS_SET);
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags, protWords));
@@ -345,8 +340,8 @@ public class TestWordDelimiterFilter ext
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
}
};
@@ -367,8 +362,8 @@ public class TestWordDelimiterFilter ext
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Thu Jan 9 13:27:29 2014
@@ -46,7 +46,7 @@ public class EdgeNGramTokenFilterTest ex
@Override
public void setUp() throws Exception {
super.setUp();
- input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
+ input = whitespaceMockTokenizer("abcde");
}
public void testInvalidInput() throws Exception {
@@ -95,7 +95,7 @@ public class EdgeNGramTokenFilterTest ex
}
public void testFilterPositions() throws Exception {
- TokenStream ts = new MockTokenizer(new StringReader("abcde vwxyz"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = whitespaceMockTokenizer("abcde vwxyz");
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 1, 3);
assertTokenStreamContents(tokenizer,
new String[]{"a","ab","abc","v","vw","vwx"},
@@ -139,7 +139,7 @@ public class EdgeNGramTokenFilterTest ex
}
public void testFirstTokenPositionIncrement() throws Exception {
- TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
+ TokenStream ts = whitespaceMockTokenizer("a abc");
ts = new PositionFilter(ts); // All but first token will get 0 position increment
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 2, 3);
// The first token "a" will not be output, since it's smaller than the mingram size of 2.
@@ -154,13 +154,14 @@ public class EdgeNGramTokenFilterTest ex
}
public void testSmallTokenInStream() throws Exception {
- input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
+ input = whitespaceMockTokenizer("abc de fgh");
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {
- WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT);
+ tokenizer.setReader(new StringReader("abcde"));
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 3);
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
tokenizer.setReader(new StringReader("abcde"));
@@ -175,8 +176,8 @@ public class EdgeNGramTokenFilterTest ex
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
}
@@ -189,8 +190,8 @@ public class EdgeNGramTokenFilterTest ex
Random random = random();
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer,
new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15));
}
@@ -199,7 +200,8 @@ public class EdgeNGramTokenFilterTest ex
}
public void testGraphs() throws IOException {
- TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
+ TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT);
+ ((Tokenizer)tk).setReader(new StringReader("abc d efgh ij klmno p q"));
tk = new ShingleFilter(tk);
tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
assertTokenStreamContents(tk,
@@ -217,7 +219,8 @@ public class EdgeNGramTokenFilterTest ex
final int codePointCount = s.codePointCount(0, s.length());
final int minGram = _TestUtil.nextInt(random(), 1, 3);
final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
- TokenStream tk = new KeywordTokenizer(new StringReader(s));
+ TokenStream tk = new KeywordTokenizer();
+ ((Tokenizer)tk).setReader(new StringReader(s));
tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerTest.java Thu Jan 9 13:27:29 2014
@@ -45,7 +45,7 @@ public class EdgeNGramTokenizerTest exte
public void testInvalidInput() throws Exception {
boolean gotException = false;
try {
- new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 0, 0);
+ new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 0, 0).setReader(input);
} catch (IllegalArgumentException e) {
gotException = true;
}
@@ -55,7 +55,7 @@ public class EdgeNGramTokenizerTest exte
public void testInvalidInput2() throws Exception {
boolean gotException = false;
try {
- new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
+ new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 2, 1).setReader(input);
} catch (IllegalArgumentException e) {
gotException = true;
}
@@ -65,7 +65,7 @@ public class EdgeNGramTokenizerTest exte
public void testInvalidInput3() throws Exception {
boolean gotException = false;
try {
- new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, -1, 2);
+ new EdgeNGramTokenizer(TEST_VERSION_CURRENT, -1, 2).setReader(input);
} catch (IllegalArgumentException e) {
gotException = true;
}
@@ -73,22 +73,26 @@ public class EdgeNGramTokenizerTest exte
}
public void testFrontUnigram() throws Exception {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 1);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[]{"a"}, new int[]{0}, new int[]{1}, 5 /* abcde */);
}
public void testOversizedNgrams() throws Exception {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 6, 6);
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 6, 6);
+ tokenizer.setReader(input);;
assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
}
public void testFrontRangeOfNgrams() throws Exception {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 3);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5 /* abcde */);
}
public void testReset() throws Exception {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 3);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5 /* abcde */);
tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(tokenizer, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3}, 5 /* abcde */);
@@ -102,8 +106,8 @@ public class EdgeNGramTokenizerTest exte
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, min, max);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
@@ -113,7 +117,8 @@ public class EdgeNGramTokenizerTest exte
}
public void testTokenizerPositions() throws Exception {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3);
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 1, 3);
+ tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(tokenizer,
new String[]{"a","ab","abc"},
new int[]{0,0,0},
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Thu Jan 9 13:27:29 2014
@@ -45,7 +45,7 @@ public class NGramTokenFilterTest extend
@Override
public void setUp() throws Exception {
super.setUp();
- input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
+ input = whitespaceMockTokenizer("abcde");
}
public void testInvalidInput() throws Exception {
@@ -108,13 +108,14 @@ public class NGramTokenFilterTest extend
}
public void testSmallTokenInStream() throws Exception {
- input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
+ input = whitespaceMockTokenizer("abc de fgh");
NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10}, new int[] {1, 2});
}
public void testReset() throws Exception {
- WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT);
+ tokenizer.setReader(new StringReader("abcde"));
NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
tokenizer.setReader(new StringReader("abcde"));
@@ -128,8 +129,8 @@ public class NGramTokenFilterTest extend
public void testInvalidOffsets() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
return new TokenStreamComponents(tokenizer, filters);
@@ -149,8 +150,8 @@ public class NGramTokenFilterTest extend
final int max = _TestUtil.nextInt(random(), min, 20);
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
}
@@ -163,8 +164,8 @@ public class NGramTokenFilterTest extend
Random random = random();
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer,
new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15));
}
@@ -189,7 +190,8 @@ public class NGramTokenFilterTest extend
final int codePointCount = s.codePointCount(0, s.length());
final int minGram = _TestUtil.nextInt(random(), 1, 3);
final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
- TokenStream tk = new KeywordTokenizer(new StringReader(s));
+ TokenStream tk = new KeywordTokenizer();
+ ((Tokenizer)tk).setReader(new StringReader(s));
tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java Thu Jan 9 13:27:29 2014
@@ -51,8 +51,9 @@ public class NGramTokenizerTest extends
public void testInvalidInput() throws Exception {
boolean gotException = false;
- try {
- new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
+ try {
+ NGramTokenizer tok = new NGramTokenizer(TEST_VERSION_CURRENT, 2, 1);
+ tok.setReader(input);
} catch (IllegalArgumentException e) {
gotException = true;
}
@@ -61,8 +62,9 @@ public class NGramTokenizerTest extends
public void testInvalidInput2() throws Exception {
boolean gotException = false;
- try {
- new NGramTokenizer(TEST_VERSION_CURRENT, input, 0, 1);
+ try {
+ NGramTokenizer tok = new NGramTokenizer(TEST_VERSION_CURRENT, 0, 1);
+ tok.setReader(input);
} catch (IllegalArgumentException e) {
gotException = true;
}
@@ -70,17 +72,20 @@ public class NGramTokenizerTest extends
}
public void testUnigrams() throws Exception {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 1);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}, 5 /* abcde */);
}
public void testBigrams() throws Exception {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2);
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 2, 2);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[]{"ab","bc","cd","de"}, new int[]{0,1,2,3}, new int[]{2,3,4,5}, 5 /* abcde */);
}
public void testNgrams() throws Exception {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 3);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer,
new String[]{"a","ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e"},
new int[]{0,0,0,1,1,1,2,2,2,3,3,4},
@@ -94,12 +99,14 @@ public class NGramTokenizerTest extends
}
public void testOversizedNgrams() throws Exception {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7);
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 6, 7);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
}
public void testReset() throws Exception {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 1);
+ tokenizer.setReader(input);
assertTokenStreamContents(tokenizer, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}, 5 /* abcde */);
tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(tokenizer, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5}, 5 /* abcde */);
@@ -112,8 +119,8 @@ public class NGramTokenizerTest extends
final int max = _TestUtil.nextInt(random(), min, 20);
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, min, max);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
@@ -158,12 +165,13 @@ public class NGramTokenizerTest extends
for (int i = 0; i < codePoints.length; ++i) {
offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
}
- final TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly) {
+ final Tokenizer grams = new NGramTokenizer(TEST_VERSION_CURRENT, minGram, maxGram, edgesOnly) {
@Override
protected boolean isTokenChar(int chr) {
return nonTokenChars.indexOf(chr) < 0;
}
};
+ grams.setReader(new StringReader(s));
final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java Thu Jan 9 13:27:29 2014
@@ -20,8 +20,8 @@ package org.apache.lucene.analysis.ngram
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
@@ -33,9 +33,10 @@ public class TestNGramFilters extends Ba
*/
public void testNGramTokenizer() throws Exception {
Reader reader = new StringReader("test");
- TokenStream stream = tokenizerFactory("NGram").create(reader);
- assertTokenStreamContents(stream,
- new String[] { "t", "te", "e", "es", "s", "st", "t" });
+ TokenStream stream = tokenizerFactory("NGram").create();
+ ((Tokenizer)stream).setReader(reader);
+ assertTokenStreamContents(stream,
+ new String[]{"t", "te", "e", "es", "s", "st", "t"});
}
/**
@@ -45,7 +46,8 @@ public class TestNGramFilters extends Ba
Reader reader = new StringReader("test");
TokenStream stream = tokenizerFactory("NGram",
"minGramSize", "2",
- "maxGramSize", "3").create(reader);
+ "maxGramSize", "3").create();
+ ((Tokenizer)stream).setReader(reader);
assertTokenStreamContents(stream,
new String[] { "te", "tes", "es", "est", "st" });
}
@@ -55,7 +57,7 @@ public class TestNGramFilters extends Ba
*/
public void testNGramFilter() throws Exception {
Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("NGram").create(stream);
assertTokenStreamContents(stream,
new String[] { "t", "te", "e", "es", "s", "st", "t" });
@@ -66,7 +68,7 @@ public class TestNGramFilters extends Ba
*/
public void testNGramFilter2() throws Exception {
Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("NGram",
"minGramSize", "2",
"maxGramSize", "3").create(stream);
@@ -79,7 +81,8 @@ public class TestNGramFilters extends Ba
*/
public void testEdgeNGramTokenizer() throws Exception {
Reader reader = new StringReader("test");
- TokenStream stream = tokenizerFactory("EdgeNGram").create(reader);
+ TokenStream stream = tokenizerFactory("EdgeNGram").create();
+ ((Tokenizer)stream).setReader(reader);
assertTokenStreamContents(stream,
new String[] { "t" });
}
@@ -91,7 +94,8 @@ public class TestNGramFilters extends Ba
Reader reader = new StringReader("test");
TokenStream stream = tokenizerFactory("EdgeNGram",
"minGramSize", "1",
- "maxGramSize", "2").create(reader);
+ "maxGramSize", "2").create();
+ ((Tokenizer)stream).setReader(reader);
assertTokenStreamContents(stream,
new String[] { "t", "te" });
}
@@ -101,7 +105,7 @@ public class TestNGramFilters extends Ba
*/
public void testEdgeNGramFilter() throws Exception {
Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("EdgeNGram").create(stream);
assertTokenStreamContents(stream,
new String[] { "t" });
@@ -112,7 +116,7 @@ public class TestNGramFilters extends Ba
*/
public void testEdgeNGramFilter2() throws Exception {
Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("EdgeNGram",
"minGramSize", "1",
"maxGramSize", "2").create(stream);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java Thu Jan 9 13:27:29 2014
@@ -42,9 +42,8 @@ import static org.apache.lucene.analysis
public class TestNorwegianLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, BOKMAAL));
}
};
@@ -58,8 +57,8 @@ public class TestNorwegianLightStemFilte
public void testNynorskVocabulary() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, NYNORSK));
}
};
@@ -70,8 +69,8 @@ public class TestNorwegianLightStemFilte
final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink));
}
@@ -88,8 +87,8 @@ public class TestNorwegianLightStemFilte
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new NorwegianLightStemFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.util.B
public class TestNorwegianLightStemFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
Reader reader = new StringReader("epler eple");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("NorwegianLightStem").create(stream);
assertTokenStreamContents(stream, new String[] { "epl", "epl" });
}
@@ -38,7 +38,7 @@ public class TestNorwegianLightStemFilte
/** Test stemming with variant set explicitly to Bokmål */
public void testBokmaalStemming() throws Exception {
Reader reader = new StringReader("epler eple");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("NorwegianLightStem", "variant", "nb").create(stream);
assertTokenStreamContents(stream, new String[] { "epl", "epl" });
}
@@ -46,7 +46,7 @@ public class TestNorwegianLightStemFilte
/** Test stemming with variant set explicitly to Nynorsk */
public void testNynorskStemming() throws Exception {
Reader reader = new StringReader("gutar gutane");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("NorwegianLightStem", "variant", "nn").create(stream);
assertTokenStreamContents(stream, new String[] { "gut", "gut" });
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java Thu Jan 9 13:27:29 2014
@@ -41,9 +41,8 @@ import static org.apache.lucene.analysis
public class TestNorwegianMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(source, BOKMAAL));
}
};
@@ -57,8 +56,8 @@ public class TestNorwegianMinimalStemFil
public void testNynorskVocabulary() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(source, NYNORSK));
}
};
@@ -69,8 +68,8 @@ public class TestNorwegianMinimalStemFil
final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(sink));
}
@@ -87,8 +86,8 @@ public class TestNorwegianMinimalStemFil
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
@@ -30,7 +31,8 @@ import org.apache.lucene.analysis.util.B
public class TestNorwegianMinimalStemFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
Reader reader = new StringReader("eple eplet epler eplene eplets eplenes");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ ((Tokenizer)stream).setReader(reader);
stream = tokenFilterFactory("NorwegianMinimalStem").create(stream);
assertTokenStreamContents(stream, new String[] { "epl", "epl", "epl", "epl", "epl", "epl" });
}
@@ -38,7 +40,8 @@ public class TestNorwegianMinimalStemFil
/** Test stemming with variant set explicitly to Bokmål */
public void testBokmaalStemming() throws Exception {
Reader reader = new StringReader("eple eplet epler eplene eplets eplenes");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ ((Tokenizer)stream).setReader(reader);
stream = tokenFilterFactory("NorwegianMinimalStem", "variant", "nb").create(stream);
assertTokenStreamContents(stream, new String[] { "epl", "epl", "epl", "epl", "epl", "epl" });
}
@@ -46,7 +49,8 @@ public class TestNorwegianMinimalStemFil
/** Test stemming with variant set explicitly to Nynorsk */
public void testNynorskStemming() throws Exception {
Reader reader = new StringReader("gut guten gutar gutane gutens gutanes");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ ((Tokenizer)stream).setReader(reader);
stream = tokenFilterFactory("NorwegianMinimalStem", "variant", "nn").create(stream);
assertTokenStreamContents(stream, new String[] { "gut", "gut", "gut", "gut", "gut", "gut" });
}