You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2011/05/22 23:45:45 UTC
svn commit: r1126234 [20/28] - in /lucene/dev/branches/solr2452: ./
dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contri...
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Sun May 22 21:45:19 2011
@@ -21,6 +21,7 @@ import java.io.StringReader;
import org.xml.sax.InputSource;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@@ -35,8 +36,8 @@ public class TestCompoundWordTokenFilter
.getHyphenationTree(is);
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "min veninde som er lidt af en læsehest")), hyphenator,
+ new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false),
+ hyphenator,
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
@@ -55,8 +56,8 @@ public class TestCompoundWordTokenFilter
// the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "basketballkurv")), hyphenator, dict,
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
+ hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
assertTokenStreamContents(tf,
@@ -77,7 +78,7 @@ public class TestCompoundWordTokenFilter
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
2, 4);
@@ -89,7 +90,7 @@ public class TestCompoundWordTokenFilter
tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 6);
@@ -101,7 +102,7 @@ public class TestCompoundWordTokenFilter
tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 10);
@@ -120,9 +121,10 @@ public class TestCompoundWordTokenFilter
"Sko", "Vind", "Rute", "Torkare", "Blad" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new MockTokenizer(
new StringReader(
- "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
+ "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"),
+ MockTokenizer.WHITESPACE, false),
dict);
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
@@ -149,7 +151,7 @@ public class TestCompoundWordTokenFilter
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
+ new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Sun May 22 21:45:19 2011
@@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseT
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
final class PayloadSetter extends TokenFilter {
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Sun May 22 21:45:19 2011
@@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends
dir.close();
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Sun May 22 21:45:19 2011
@@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends
assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset());
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Sun May 22 21:45:19 2011
@@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
@@ -219,4 +220,9 @@ public class TestStandardAnalyzer extend
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java Sun May 22 21:45:19 2011
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -36,36 +37,23 @@ public class TestStopFilter extends Base
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet("is", "the", "Time");
- TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
- final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.toString());
- assertTrue(stream.incrementToken());
- assertEquals("The", termAtt.toString());
- assertFalse(stream.incrementToken());
+ TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
+ assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet( "is", "the", "Time" );
- TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
- final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.toString());
- assertFalse(stream.incrementToken());
+ TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
+ assertTokenStreamContents(stream, new String[] { "Now" });
}
public void testStopFilt() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
- TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
- final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.toString());
- assertTrue(stream.incrementToken());
- assertEquals("The", termAtt.toString());
- assertFalse(stream.incrementToken());
+ TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
+ assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
/**
@@ -85,11 +73,11 @@ public class TestStopFilter extends Base
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
- StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+ StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,true);
// without increments
reader = new StringReader(sb.toString());
- stpf = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+ stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,false);
// with increments, concatenating two stop filters
ArrayList<String> a0 = new ArrayList<String>();
@@ -108,7 +96,7 @@ public class TestStopFilter extends Base
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
reader = new StringReader(sb.toString());
- StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet0); // first part of the set
+ StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true);
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01,true);
@@ -119,6 +107,7 @@ public class TestStopFilter extends Base
stpf.setEnablePositionIncrements(enableIcrements);
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
+ stpf.reset();
for (int i=0; i<20; i+=3) {
assertTrue(stpf.incrementToken());
log("Token "+i+": "+stpf);
@@ -127,6 +116,8 @@ public class TestStopFilter extends Base
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
}
assertFalse(stpf.incrementToken());
+ stpf.end();
+ stpf.close();
}
// print debug info depending on VERBOSE
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Sun May 22 21:45:19 2011
@@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Sun May 22 21:45:19 2011
@@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends B
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java Sun May 22 21:45:19 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -278,7 +278,7 @@ public class TestCzechStemmer extends Ba
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole");
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
+ new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Sun May 22 21:45:19 2011
@@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestGermanLightStemFilter e
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
}
};
@@ -45,4 +45,9 @@ public class TestGermanLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestGermanMinimalStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
}
};
@@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Sun May 22 21:45:19 2011
@@ -36,20 +36,30 @@ import static org.apache.lucene.analysis
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t,
+ new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+ }
+ };
- public void testStemming() throws Exception {
- Analyzer analyzer = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t,
- new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
- }
- };
-
+ public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt");
assertVocabulary(analyzer, vocOut);
vocOut.close();
}
+
+ // LUCENE-3043: we use keywordtokenizer in this test,
+ // so ensure the stemmer does not crash on zero-length strings.
+ public void testEmpty() throws Exception {
+ assertAnalyzesTo(analyzer, "", new String[] { "" });
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Sun May 22 21:45:19 2011
@@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends B
assertAnalyzesToReuse(a, "ΠΡÎΫΠÎÎÎΣÎÎΣ ÎÏογοÏ, ο μεÏÏÏÏ ÎºÎ±Î¹ οι άλλοι",
new String[] { "ÏÏοÏ
ÏοθεÏ", "αÏογ", "μεÏÏ", "αλλ" });
}
- }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Sun May 22 21:45:19 2011
@@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends
checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
/**
@@ -34,7 +34,7 @@ public class TestEnglishMinimalStemFilte
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
}
};
@@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilte
checkOneTerm(analyzer, "congress", "congress");
checkOneTerm(analyzer, "serious", "serious");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java Sun May 22 21:45:19 2011
@@ -22,12 +22,11 @@ import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -36,29 +35,34 @@ import static org.apache.lucene.analysis
/**
* Test the PorterStemFilter with Martin Porter's test data.
*/
-public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ return new TokenStreamComponents(t, new PorterStemFilter(t));
+ }
+ };
+
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
- Analyzer a = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t, new PorterStemFilter(t));
- }
- };
-
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
}
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("yourselves");
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestSpanishLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
}
};
@@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends
checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Sun May 22 21:45:19 2011
@@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestFinnishLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
}
};
@@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Sun May 22 21:45:19 2011
@@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a, "Votre", new String[] { });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestFrenchLightStemFilter e
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
}
};
@@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestFrenchMinimalStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
}
};
@@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extend
checkOneTermReuse(a, "correspondente", "correspondente");
checkOneTermReuse(a, "corresponderá", "correspond");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java Sun May 22 21:45:19 2011
@@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends B
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिà¤à¤¦à¥", "हिà¤à¤¦à¥");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java Sun May 22 21:45:19 2011
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test HindiNormalizer
@@ -59,8 +59,7 @@ public class TestHindiNormalizer extends
check("à¤à¤à¤à¥ ॡà¤à¤à¥à¥à¥à¥£à¥à¥", "à¤
à¤à¤à¤à¤à¤à¤à¤¿à¥à¥à¥¢à¥à¥");
}
private void check(String input, String output) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java Sun May 22 21:45:19 2011
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test HindiStemmer
@@ -81,8 +81,7 @@ public class TestHindiStemmer extends Ba
}
private void check(String input, String output) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter tf = new HindiStemFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestHungarianAnalyzer exten
checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestHungarianLightStemFilte
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
}
};
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extend
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö", "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö");
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾", "Õ¡ÖÕ®");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer exte
checkOneTermReuse(a, "peledakan", "peledakan");
checkOneTermReuse(a, "pembunuhan", "bunuh");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java Sun May 22 21:45:19 2011
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test IndicNormalizer
@@ -44,8 +44,7 @@ public class TestIndicNormalizer extends
}
private void check(String input, String output) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);;
TokenFilter tf = new IndicNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Sun May 22 21:45:19 2011
@@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -50,4 +51,23 @@ public class TestItalianAnalyzer extends
checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+
+ /** test that the elisionfilter is working */
+ public void testContractions() throws IOException {
+ Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "dell'Italia", new String[] { "ital" });
+ assertAnalyzesTo(a, "l'Italiano", new String[] { "ital" });
+ }
+
+ /** test that we don't enable this before 3.2*/
+ public void testContractionsBackwards() throws IOException {
+ Analyzer a = new ItalianAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a, "dell'Italia", new String[] { "dell'ital" });
+ assertAnalyzesTo(a, "l'Italiano", new String[] { "l'ital" });
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestItalianLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
}
};
@@ -45,4 +45,9 @@ public class TestItalianLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java Sun May 22 21:45:19 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -30,14 +31,14 @@ public class TestASCIIFoldingFilter exte
// testLain1Accents() is a copy of TestLatin1AccentFilter.testU().
public void testLatin1Accents() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader
+ TokenStream stream = new MockTokenizer(new StringReader
("Des mot clés à LA CHAÃNE à à à à à Ã
à à à à à à à à à à IJ à Ã"
+" à à à à à à Šà à à à à à Ÿ à á â ã ä å æ ç è é ê ë ì à î ï ij"
- +" ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ ï¬ ï¬"));
+ +" ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ ï¬ ï¬"), MockTokenizer.WHITESPACE, false);
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
-
+ filter.reset();
assertTermEquals("Des", filter, termAtt);
assertTermEquals("mot", filter, termAtt);
assertTermEquals("cles", filter, termAtt);
@@ -1891,10 +1892,11 @@ public class TestASCIIFoldingFilter exte
expectedOutputTokens.add(expected.toString());
}
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(inputText.toString()));
+ TokenStream stream = new MockTokenizer(new StringReader(inputText.toString()), MockTokenizer.WHITESPACE, false);
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
Iterator<String> expectedIter = expectedOutputTokens.iterator();
+ filter.reset();
while (expectedIter.hasNext()) {
assertTermEquals(expectedIter.next(), filter, termAtt);
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java Sun May 22 21:45:19 2011
@@ -25,9 +25,8 @@ import java.util.Collection;
import java.util.List;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;
@@ -105,7 +104,7 @@ public class TestCapitalizationFilter ex
boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
Collection<char[]> okPrefix, int minWordLength, int maxWordCount,
int maxTokenLength) throws IOException {
- assertCapitalizesTo(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+ assertCapitalizesTo(new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
expected, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength,
maxWordCount, maxTokenLength);
}
@@ -114,7 +113,7 @@ public class TestCapitalizationFilter ex
boolean onlyFirstWord, CharArraySet keep, boolean forceFirstLetter,
Collection<char[]> okPrefix, int minWordLength, int maxWordCount,
int maxTokenLength) throws IOException {
- assertCapitalizesTo(new KeywordTokenizer(new StringReader(input)),
+ assertCapitalizesTo(new MockTokenizer(new StringReader(input), MockTokenizer.KEYWORD, false),
new String[] { expected }, onlyFirstWord, keep, forceFirstLetter, okPrefix,
minWordLength, maxWordCount, maxTokenLength);
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java Sun May 22 21:45:19 2011
@@ -20,8 +20,8 @@ package org.apache.lucene.analysis.misce
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* HyphenatedWordsFilter test
@@ -30,7 +30,7 @@ public class TestHyphenatedWordsFilter e
public void testHyphenatedWords() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecologi-\ncal";
// first test
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
ts = new HyphenatedWordsFilter(ts);
assertTokenStreamContents(ts,
new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecological" });
@@ -42,7 +42,7 @@ public class TestHyphenatedWordsFilter e
public void testHyphenAtEnd() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecology-";
// first test
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
ts = new HyphenatedWordsFilter(ts);
assertTokenStreamContents(ts,
new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecology-" });
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
/** Test {@link KeepWordFilter} */
@@ -38,22 +38,22 @@ public class TestKeepWordFilter extends
String input = "xxx yyy aaa zzz BBB ccc ddd EEE";
// Test Stopwords
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ TokenStream stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
// Now force case
- stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
// Test Stopwords
- stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
// Now force case
- stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
}
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java Sun May 22 21:45:19 2011
@@ -8,9 +8,9 @@ import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -45,17 +45,17 @@ public class TestKeywordMarkerFilter ext
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
"jumps" };
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "The quIck browN LuceneFox Jumps")), set)), output);
+ new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
Set<String> jdkSet = new HashSet<String>();
jdkSet.add("LuceneFox");
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "The quIck browN LuceneFox Jumps")), jdkSet)), output);
+ new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), jdkSet)), output);
Set<?> set2 = set;
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "The quIck browN LuceneFox Jumps")), set2)), output);
+ new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
}
// LUCENE-2901
@@ -63,8 +63,7 @@ public class TestKeywordMarkerFilter ext
TokenStream ts = new LowerCaseFilterMock(
new KeywordMarkerFilter(
new KeywordMarkerFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("Dogs Trees Birds Houses")),
+ new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))),
new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Sun May 22 21:45:19 2011
@@ -18,15 +18,13 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
public class TestLengthFilter extends BaseTokenStreamTestCase {
public void testFilterNoPosIncr() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
+ TokenStream stream = new MockTokenizer(
+ new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
LengthFilter filter = new LengthFilter(false, stream, 2, 6);
assertTokenStreamContents(filter,
new String[]{"short", "ab", "foo"},
@@ -35,8 +33,8 @@ public class TestLengthFilter extends Ba
}
public void testFilterWithPosIncr() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
+ TokenStream stream = new MockTokenizer(
+ new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
LengthFilter filter = new LengthFilter(true, stream, 2, 6);
assertTokenStreamContents(filter,
new String[]{"short", "ab", "foo"},
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java Sun May 22 21:45:19 2011
@@ -51,7 +51,7 @@ public class TestLimitTokenCountAnalyzer
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000)));
+ TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(random), 100000)));
Document doc = new Document();
StringBuilder b = new StringBuilder();
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java Sun May 22 21:45:19 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -30,7 +30,7 @@ public class TestPrefixAndSuffixAwareTok
PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
new SingleTokenTokenStream(createToken("^", 0, 0)),
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")),
+ new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false),
new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java Sun May 22 21:45:19 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -41,7 +41,7 @@ public class TestPrefixAwareTokenFilter
// prefix and suffix using 2x prefix
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")));
+ new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false));
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Sun May 22 21:45:19 2011
@@ -19,12 +19,11 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -127,8 +126,8 @@ public class TestWordDelimiterFilter ext
}
public void doSplit(final String input, String... output) throws Exception {
- WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
+ new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf, output);
}
@@ -169,8 +168,8 @@ public class TestWordDelimiterFilter ext
}
public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
- WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), 1,1,0,0,0,1,0,1,stemPossessive, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
+ new StringReader(input), MockTokenizer.KEYWORD, false), 1,1,0,0,0,1,0,1,stemPossessive, null);
assertTokenStreamContents(wdf, output);
}
@@ -216,7 +215,7 @@ public class TestWordDelimiterFilter ext
@Override
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false),
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
}
};
@@ -244,7 +243,7 @@ public class TestWordDelimiterFilter ext
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
new LargePosIncTokenFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)),
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)),
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
}
};
@@ -276,7 +275,7 @@ public class TestWordDelimiterFilter ext
@Override
public TokenStream tokenStream(String field, Reader reader) {
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), StandardAnalyzer.STOP_WORDS_SET);
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET);
filter.setEnablePositionIncrements(true);
return new WordDelimiterFilter(filter,
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);