You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/18 14:58:55 UTC
svn commit: r1124242 [1/2] - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/ lucene/contrib/ lucene/contrib/analyzers/common/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/
lucene/contrib/analyzers/co...
Author: rmuir
Date: Wed May 18 12:58:53 2011
New Revision: 1124242
URL: http://svn.apache.org/viewvc?rev=1124242&view=rev
Log:
LUCENE-3113: fix analyzer bugs found by MockTokenizer
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
lucene/dev/branches/branch_3x/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Wed May 18 12:58:53 2011
@@ -27,6 +27,12 @@ Bug Fixes
caused a problem if you consumed a tokenstream, then reused it, added different
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
+ * LUCENE-3113: Fixed some minor analysis bugs: double-reset() in ReusableAnalyzerBase
+ and ShingleAnalyzerWrapper, missing end() implementations in PrefixAwareTokenFilter
+ and PrefixAndSuffixAwareTokenFilter, invocations of incrementToken() after it
+ already returned false in CommonGramsQueryFilter, HyphenatedWordsFilter,
+ ShingleFilter, and SynonymsFilter. (Robert Muir, Steven Rowe, Uwe Schindler)
+
New Features
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java Wed May 18 12:58:53 2011
@@ -76,4 +76,9 @@ public class PrefixAndSuffixAwareTokenFi
public void close() throws IOException {
suffix.close();
}
+
+ @Override
+ public void end() throws IOException {
+ suffix.end();
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java Wed May 18 12:58:53 2011
@@ -159,6 +159,12 @@ public class PrefixAwareTokenFilter exte
}
@Override
+ public void end() throws IOException {
+ prefix.end();
+ suffix.end();
+ }
+
+ @Override
public void close() throws IOException {
prefix.close();
suffix.close();
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java Wed May 18 12:58:53 2011
@@ -228,7 +228,6 @@ public final class QueryAutoStopWordAnal
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
if (result == streams.wrapped) {
/* the wrapped analyzer reused the stream */
- streams.withStopFilter.reset();
} else {
/*
* the wrapped analyzer did not. if there are any stopwords for the
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java Wed May 18 12:58:53 2011
@@ -199,10 +199,7 @@ public final class ShingleAnalyzerWrappe
setPreviousTokenStream(streams);
} else {
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
- if (result == streams.wrapped) {
- /* the wrapped analyzer reused the stream */
- streams.shingle.reset();
- } else {
+ if (result != streams.wrapped) {
/* the wrapped analyzer did not, create a new shingle around the new one */
streams.wrapped = result;
streams.shingle = new ShingleFilter(streams.wrapped);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java Wed May 18 12:58:53 2011
@@ -327,6 +327,8 @@ public final class ShingleFilter extends
return tokenAvailable;
}
+ private boolean exhausted;
+
/**
* <p>Get the next token from the input stream.
* <p>If the next token has <code>positionIncrement > 1</code>,
@@ -359,7 +361,7 @@ public final class ShingleFilter extends
}
isNextInputStreamToken = false;
newTarget.isFiller = false;
- } else if (input.incrementToken()) {
+ } else if (!exhausted && input.incrementToken()) {
if (null == target) {
newTarget = new InputWindowToken(cloneAttributes());
} else {
@@ -387,6 +389,7 @@ public final class ShingleFilter extends
}
} else {
newTarget = null;
+ exhausted = true;
}
return newTarget;
}
@@ -435,7 +438,8 @@ public final class ShingleFilter extends
inputWindow.clear();
numFillerTokensToInsert = 0;
isOutputHere = false;
- noShingleOutput = true;
+ noShingleOutput = true;
+ exhausted = false;
if (outputUnigramsIfNoShingles && ! outputUnigrams) {
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
gramSize.minValue = minShingleSize;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java Wed May 18 12:58:53 2011
@@ -23,7 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerFilter;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.util.Version;
/**
@@ -215,8 +215,7 @@ public class TestBulgarianStemmer extend
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("ÑÑÑоеве");
- WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("ÑÑÑоевеÑе ÑÑÑоеве"));
+ MockTokenizer tokenStream = new MockTokenizer(new StringReader("ÑÑÑоевеÑе ÑÑÑоеве"), MockTokenizer.WHITESPACE, false);
BulgarianStemFilter filter = new BulgarianStemFilter(
new KeywordMarkerFilter(tokenStream, set));
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Wed May 18 12:58:53 2011
@@ -21,6 +21,7 @@ import java.io.StringReader;
import org.xml.sax.InputSource;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
@@ -35,8 +36,8 @@ public class TestCompoundWordTokenFilter
.getHyphenationTree(is);
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "min veninde som er lidt af en læsehest")), hyphenator,
+ new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false),
+ hyphenator,
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
@@ -55,8 +56,8 @@ public class TestCompoundWordTokenFilter
// the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "basketballkurv")), hyphenator, dict,
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
+ hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
assertTokenStreamContents(tf,
@@ -77,7 +78,7 @@ public class TestCompoundWordTokenFilter
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
2, 4);
@@ -89,7 +90,7 @@ public class TestCompoundWordTokenFilter
tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 6);
@@ -101,7 +102,7 @@ public class TestCompoundWordTokenFilter
tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
+ new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 10);
@@ -120,9 +121,10 @@ public class TestCompoundWordTokenFilter
"Sko", "Vind", "Rute", "Torkare", "Blad" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new MockTokenizer(
new StringReader(
- "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
+ "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"),
+ MockTokenizer.WHITESPACE, false),
dict);
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
@@ -149,7 +151,7 @@ public class TestCompoundWordTokenFilter
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
+ new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java Wed May 18 12:58:53 2011
@@ -23,7 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.KeywordMarkerFilter;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
/**
* Test the Czech Stemmer.
@@ -278,7 +278,7 @@ public class TestCzechStemmer extends Ba
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole");
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
+ new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestGermanLightStemFilter e
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestGermanMinimalStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
@@ -34,7 +34,7 @@ public class TestEnglishMinimalStemFilte
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestSpanishLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestFinnishLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestFrenchLightStemFilter e
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestFrenchMinimalStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java Wed May 18 12:58:53 2011
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Test HindiNormalizer
@@ -59,8 +59,7 @@ public class TestHindiNormalizer extends
check("à¤à¤à¤à¥ ॡà¤à¤à¥à¥à¥à¥£à¥à¥", "à¤
à¤à¤à¤à¤à¤à¤à¤¿à¥à¥à¥¢à¥à¥");
}
private void check(String input, String output) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java Wed May 18 12:58:53 2011
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Test HindiStemmer
@@ -81,8 +81,7 @@ public class TestHindiStemmer extends Ba
}
private void check(String input, String output) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter tf = new HindiStemFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestHungarianLightStemFilte
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java Wed May 18 12:58:53 2011
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Test IndicNormalizer
@@ -44,8 +44,7 @@ public class TestIndicNormalizer extends
}
private void check(String input, String output) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);;
TokenFilter tf = new IndicNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestItalianLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemmer.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
@@ -33,7 +33,7 @@ public class TestLatvianStemmer extends
private Analyzer a = new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java Wed May 18 12:58:53 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -30,7 +30,7 @@ public class TestPrefixAndSuffixAwareTok
PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
new SingleTokenTokenStream(createToken("^", 0, 0)),
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")),
+ new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false),
new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java Wed May 18 12:58:53 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.misce
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -41,7 +41,7 @@ public class TestPrefixAwareTokenFilter
// prefix and suffix using 2x prefix
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hello world")));
+ new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false));
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Wed May 18 12:58:53 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ngram
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -32,7 +33,7 @@ public class EdgeNGramTokenFilterTest ex
@Override
public void setUp() throws Exception {
super.setUp();
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
}
public void testInvalidInput() throws Exception {
@@ -91,7 +92,7 @@ public class EdgeNGramTokenFilterTest ex
}
public void testSmallTokenInStream() throws Exception {
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
+ input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Wed May 18 12:58:53 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ngram
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -32,7 +33,7 @@ public class NGramTokenFilterTest extend
@Override
public void setUp() throws Exception {
super.setUp();
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
}
public void testInvalidInput() throws Exception {
@@ -80,7 +81,7 @@ public class NGramTokenFilterTest extend
}
public void testSmallTokenInStream() throws Exception {
- input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
+ input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Wed May 18 12:58:53 2011
@@ -16,8 +16,8 @@ package org.apache.lucene.analysis.paylo
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
@@ -30,7 +30,7 @@ public class DelimitedPayloadTokenFilter
public void testPayloads() throws Exception {
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
+ (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
@@ -51,7 +51,7 @@ public class DelimitedPayloadTokenFilter
String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
- (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
+ (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
assertTermEquals("The", filter, null);
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
@@ -69,7 +69,7 @@ public class DelimitedPayloadTokenFilter
public void testFloatEncoding() throws Exception {
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
- DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
@@ -87,7 +87,7 @@ public class DelimitedPayloadTokenFilter
public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
- DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
@@ -106,6 +106,7 @@ public class DelimitedPayloadTokenFilter
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
+ stream.reset();
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
Payload payload = payloadAtt.getPayload();
@@ -122,6 +123,7 @@ public class DelimitedPayloadTokenFilter
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+ stream.reset();
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
Payload payload = payAtt.getPayload();
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java Wed May 18 12:58:53 2011
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.paylo
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,11 +32,12 @@ public class NumericPayloadTokenFilterTe
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
- NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
+ NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), 3, "D");
boolean seenDogs = false;
CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
+ nptf.reset();
while (nptf.incrementToken()) {
if (termAtt.toString().equals("dogs")) {
seenDogs = true;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java Wed May 18 12:58:53 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.paylo
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
@@ -30,11 +30,11 @@ public class TokenOffsetPayloadTokenFilt
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
- TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+ TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
int count = 0;
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);
-
+ nptf.reset();
while (nptf.incrementToken()) {
Payload pay = payloadAtt.getPayload();
assertTrue("pay is null and it shouldn't be", pay != null);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java Wed May 18 12:58:53 2011
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.paylo
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,12 +32,12 @@ public class TypeAsPayloadTokenFilterTes
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
- TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
+ TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
int count = 0;
CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
-
+ nptf.reset();
while (nptf.incrementToken()) {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java Wed May 18 12:58:53 2011
@@ -22,10 +22,9 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.LetterTokenizer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -50,7 +49,7 @@ public class QueryAutoStopWordAnalyzerTe
public void setUp() throws Exception {
super.setUp();
dir = new RAMDirectory();
- appAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+ appAnalyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, appAnalyzer));
int numDocs = 200;
for (int i = 0; i < numDocs; i++) {
@@ -159,9 +158,9 @@ public class QueryAutoStopWordAnalyzerTe
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
- return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
else
- return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
}
}
@@ -175,7 +174,7 @@ public class QueryAutoStopWordAnalyzerTe
}
public void testTokenStream() throws Exception {
- QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
a.addStopWords(reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
assertTokenStreamContents(ts, new String[] { "this" });
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Wed May 18 12:58:53 2011
@@ -19,21 +19,21 @@ package org.apache.lucene.analysis.rever
import java.io.StringReader;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testFilter() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("Do have a nice day")); // 1-4 length string
+ TokenStream stream = new MockTokenizer(new StringReader("Do have a nice day"),
+ MockTokenizer.WHITESPACE, false); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" });
}
public void testFilterWithMark() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "Do have a nice day")); // 1-4 length string
+ TokenStream stream = new MockTokenizer(new StringReader("Do have a nice day"),
+ MockTokenizer.WHITESPACE, false); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
assertTokenStreamContents(filter,
new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" });
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestRussianLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new RussianLightStemFilter(source));
}
};
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Wed May 18 12:58:53 2011
@@ -22,10 +22,9 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.LetterTokenizer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
@@ -106,7 +105,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"test sentence");
int[] ranks = new int[] { 1, 2, 0 };
compareRanks(hits, ranks);
@@ -117,7 +116,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"\"this sentence\"");
int[] ranks = new int[] { 0 };
compareRanks(hits, ranks);
@@ -128,7 +127,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"\"test sentence\"");
int[] ranks = new int[] { 1 };
compareRanks(hits, ranks);
@@ -139,7 +138,7 @@ public class ShingleAnalyzerWrapperTest
*/
public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
- (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+ (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
"+test +sentence");
int[] ranks = new int[] { 1, 2 };
compareRanks(hits, ranks);
@@ -149,7 +148,7 @@ public class ShingleAnalyzerWrapperTest
* This shows how to construct a phrase query containing shingles.
*/
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
- Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+ Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
searcher = setUpSearcher(analyzer);
PhraseQuery q = new PhraseQuery();
@@ -161,6 +160,7 @@ public class ShingleAnalyzerWrapperTest
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
while (ts.incrementToken()) {
j += posIncrAtt.getPositionIncrement();
String termText = termAtt.toString();
@@ -178,7 +178,7 @@ public class ShingleAnalyzerWrapperTest
* in the right order and adjacent to each other.
*/
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
- Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+ Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
searcher = setUpSearcher(analyzer);
BooleanQuery q = new BooleanQuery();
@@ -188,6 +188,8 @@ public class ShingleAnalyzerWrapperTest
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
+
while (ts.incrementToken()) {
String termText = termAtt.toString();
q.add(new TermQuery(new Term("content", termText)),
@@ -200,7 +202,7 @@ public class ShingleAnalyzerWrapperTest
}
public void testReusableTokenStream() throws Exception {
- Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+ Analyzer a = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
assertAnalyzesToReuse(a, "please divide into shingles",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
@@ -222,9 +224,9 @@ public class ShingleAnalyzerWrapperTest
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
- return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
else
- return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
}
}
@@ -249,7 +251,7 @@ public class ShingleAnalyzerWrapperTest
public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 4);
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this", "please divide this sentence",
"divide", "divide this sentence", "divide this sentence into",
@@ -273,7 +275,7 @@ public class ShingleAnalyzerWrapperTest
public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 3);
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 3);
assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this",
"divide", "divide this sentence",
@@ -297,7 +299,7 @@ public class ShingleAnalyzerWrapperTest
public void testNoTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setTokenSeparator("");
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
@@ -319,7 +321,7 @@ public class ShingleAnalyzerWrapperTest
public void testNullTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setTokenSeparator(null);
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
@@ -340,7 +342,7 @@ public class ShingleAnalyzerWrapperTest
}
public void testAltTokenSeparator() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setTokenSeparator("<SEP>");
assertAnalyzesToReuse(analyzer, "please divide into shingles",
new String[] { "please", "please<SEP>divide",
@@ -362,7 +364,7 @@ public class ShingleAnalyzerWrapperTest
public void testOutputUnigramsIfNoShinglesSingleToken() throws Exception {
ShingleAnalyzerWrapper analyzer
- = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+ = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
analyzer.setOutputUnigrams(false);
analyzer.setOutputUnigramsIfNoShingles(true);
assertAnalyzesToReuse(analyzer, "please",
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java Wed May 18 12:58:53 2011
@@ -23,15 +23,15 @@ import java.util.Locale;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
+import org.apache.lucene.analysis.MockTokenizer;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
- TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+ TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
int count = 0;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Wed May 18 12:58:53 2011
@@ -21,15 +21,15 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
+import org.apache.lucene.analysis.MockTokenizer;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
String test = "The quick red fox jumped over the lazy brown dogs";
- TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+ TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
int count = 0;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java Wed May 18 12:58:53 2011
@@ -21,9 +21,9 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TeeSinkTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.TeeSinkTokenFilter.SinkTokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -34,7 +34,7 @@ public class TokenTypeSinkTokenizerTest
TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
String test = "The quick red fox jumped over the lazy brown dogs";
- TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
+ TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
boolean seenDogs = false;
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Wed May 18 12:58:53 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestSwedishLightStemFilter
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SwedishLightStemFilter(source));
}
};