You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/01/09 14:27:36 UTC
svn commit: r1556801 [8/10] - in /lucene/dev/trunk: lucene/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/ luc...
Modified: lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java Thu Jan 9 13:27:29 2014
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/** basic tests for {@link ICUTokenizerFactory} **/
@@ -32,7 +33,8 @@ public class TestICUTokenizerFactory ext
Reader reader = new StringReader("à¸à¸²à¸£à¸à¸µà¹à¹à¸à¹à¸à¹à¸à¸à¹à¸ªà¸à¸à¸§à¹à¸²à¸à¸²à¸à¸à¸µ This is a test àºàº§à»àº²àºàºàº");
ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
factory.inform(new ClasspathResourceLoader(getClass()));
- TokenStream stream = factory.create(reader);
+ Tokenizer stream = factory.create();
+ stream.setReader(reader);
assertTokenStreamContents(stream,
new String[] { "à¸à¸²à¸£", "à¸à¸µà¹", "à¹à¸à¹", "à¸à¹à¸à¸", "à¹à¸ªà¸à¸", "วà¹à¸²", "à¸à¸²à¸", "à¸à¸µ",
"This", "is", "a", "test", "àºàº§à»àº²", "àºàºàº"});
@@ -46,7 +48,8 @@ public class TestICUTokenizerFactory ext
args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(this.getClass()));
- TokenStream stream = factory.create(reader);
+ Tokenizer stream = factory.create();
+ stream.setReader(reader);
assertTokenStreamContents(stream,
new String[] { "Don't,break.at?/(punct)!", "\u201Cnice\u201D", "85_At:all;", "`really\"", "+2=3$5,&813", "!@#%$^)(*@#$" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<OTHER>" });
@@ -59,7 +62,8 @@ public class TestICUTokenizerFactory ext
args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(getClass()));
- TokenStream stream = factory.create(reader);
+ Tokenizer stream = factory.create();
+ stream.setReader(reader);
assertTokenStreamContents(stream,
new String[] { "One-two", "punch",
"Brang", "not", "brung-it",
@@ -78,7 +82,8 @@ public class TestICUTokenizerFactory ext
args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(getClass()));
- TokenStream stream = factory.create(reader);
+ Tokenizer stream = factory.create();
+ stream.setReader(reader);
assertTokenStreamContents(stream, new String[] { "Some", "English",
"Ðемного ÑÑÑÑкий. ",
"à¸à¹à¸à¸à¸§à¸²à¸¡à¸ าษาà¹à¸à¸¢à¹à¸¥à¹à¸ ๠à¸à¹à¸à¸¢ ๠",
Modified: lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java Thu Jan 9 13:27:29 2014
@@ -40,8 +40,8 @@ public class TestWithCJKBigramFilter ext
*/
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new ICUTokenizer(reader, new DefaultICUTokenizerConfig(false));
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new ICUTokenizer(new DefaultICUTokenizerConfig(false));
TokenStream result = new CJKBigramFilter(source);
return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET));
}
@@ -55,8 +55,8 @@ public class TestWithCJKBigramFilter ext
*/
private Analyzer analyzer2 = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new ICUTokenizer(reader, new DefaultICUTokenizerConfig(false));
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new ICUTokenizer(new DefaultICUTokenizerConfig(false));
// we put this before the CJKBigramFilter, because the normalization might combine
// some halfwidth katakana forms, which will affect the bigramming.
TokenStream result = new ICUNormalizer2Filter(source);
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -86,8 +86,8 @@ public class JapaneseAnalyzer extends St
}
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(userDict, true, mode);
TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags);
stream = new CJKWidthFilter(stream);
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu Jan 9 13:27:29 2014
@@ -190,27 +190,25 @@ public final class JapaneseTokenizer ext
* <p>
* Uses the default AttributeFactory.
*
- * @param input Reader containing text
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
*/
- public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
- this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
+ public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
+ this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
}
/**
* Create a new JapaneseTokenizer.
*
* @param factory the AttributeFactory to use
- * @param input Reader containing text
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
*/
public JapaneseTokenizer
- (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
- super(factory, input);
+ (AttributeFactory factory, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
+ super(factory);
dictionary = TokenInfoDictionary.getInstance();
fst = dictionary.getFST();
unkDictionary = UnknownDictionary.getInstance();
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java Thu Jan 9 13:27:29 2014
@@ -98,7 +98,7 @@ public class JapaneseTokenizerFactory ex
}
@Override
- public JapaneseTokenizer create(AttributeFactory factory, Reader input) {
- return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode);
+ public JapaneseTokenizer create(AttributeFactory factory) {
+ return new JapaneseTokenizer(factory, userDictionary, discardPunctuation, mode);
}
}
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java Thu Jan 9 13:27:29 2014
@@ -36,8 +36,8 @@ public class TestExtendedMode extends Ba
private final Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, Mode.EXTENDED);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, Mode.EXTENDED);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java Thu Jan 9 13:27:29 2014
@@ -31,8 +31,8 @@ import org.apache.lucene.analysis.util.C
public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.DEFAULT_MODE);
return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
}
};
@@ -47,8 +47,8 @@ public class TestJapaneseBaseFormFilter
final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ãã"), false);
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new JapaneseTokenizer(null, true, JapaneseTokenizer.DEFAULT_MODE);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink));
}
@@ -70,8 +70,8 @@ public class TestJapaneseBaseFormFilter
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -23,6 +23,7 @@ import java.util.HashMap;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
/**
* Simple tests for {@link JapaneseBaseFormFilterFactory}
@@ -31,7 +32,8 @@ public class TestJapaneseBaseFormFilterF
public void testBasics() throws IOException {
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- TokenStream ts = tokenizerFactory.create(new StringReader("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ã"));
+ TokenStream ts = tokenizerFactory.create();
+ ((Tokenizer)ts).setReader(new StringReader("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ã"));
JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory(new HashMap<String,String>());
ts = factory.create(ts);
assertTokenStreamContents(ts,
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java Thu Jan 9 13:27:29 2014
@@ -31,8 +31,8 @@ public class TestJapaneseIterationMarkCh
private Analyzer keywordAnalyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, tokenizer);
}
@@ -44,8 +44,8 @@ public class TestJapaneseIterationMarkCh
private Analyzer japaneseAnalyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, false, JapaneseTokenizer.Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, false, JapaneseTokenizer.Mode.SEARCH);
return new TokenStreamComponents(tokenizer, tokenizer);
}
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -21,6 +21,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -36,7 +37,8 @@ public class TestJapaneseIterationMarkCh
final String text = "æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾";
JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
CharFilter filter = filterFactory.create(new StringReader(text));
- TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
+ TokenStream tokenStream = new MockTokenizer(MockTokenizer.KEYWORD, false);
+ ((Tokenizer)tokenStream).setReader(filter);
assertTokenStreamContents(tokenStream, new String[]{"ææ馬鹿馬鹿ããã¨ããã©ãããã¹ãº"});
}
@@ -48,7 +50,8 @@ public class TestJapaneseIterationMarkCh
CharFilter filter = filterFactory.create(
new StringReader("æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾")
);
- TokenStream tokenStream = tokenizerFactory.create(filter);
+ TokenStream tokenStream = tokenizerFactory.create();
+ ((Tokenizer)tokenStream).setReader(filter);
assertTokenStreamContents(tokenStream, new String[]{"ææ", "馬鹿馬鹿ãã", "ã¨ããã©ãã", "ã", "ã¹ãº"});
}
@@ -64,7 +67,8 @@ public class TestJapaneseIterationMarkCh
CharFilter filter = filterFactory.create(
new StringReader("æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾")
);
- TokenStream tokenStream = tokenizerFactory.create(filter);
+ TokenStream tokenStream = tokenizerFactory.create();
+ ((Tokenizer)tokenStream).setReader(filter);
assertTokenStreamContents(tokenStream, new String[]{"ææ", "馬鹿馬鹿ãã", "ã¨ãã", "ã", "ã", "ã", "ãã¹", "ã¾"});
}
@@ -80,7 +84,8 @@ public class TestJapaneseIterationMarkCh
CharFilter filter = filterFactory.create(
new StringReader("æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾")
);
- TokenStream tokenStream = tokenizerFactory.create(filter);
+ TokenStream tokenStream = tokenizerFactory.create();
+ ((Tokenizer)tokenStream).setReader(filter);
assertTokenStreamContents(tokenStream, new String[]{"æã
", "馬鹿", "ã
", "ã
", "ãã", "ã¨ããã©ãã", "ã", "ã¹ãº"});
}
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java Thu Jan 9 13:27:29 2014
@@ -35,9 +35,9 @@ import java.io.Reader;
public class TestJapaneseKatakanaStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ protected TokenStreamComponents createComponents(String fieldName) {
// Use a MockTokenizer here since this filter doesn't really depend on Kuromoji
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(source));
}
};
@@ -68,8 +68,8 @@ public class TestJapaneseKatakanaStemFil
final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ã³ã¼ãã¼"), false);
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink));
}
@@ -89,8 +89,8 @@ public class TestJapaneseKatakanaStemFil
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ja;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -31,9 +32,8 @@ public class TestJapaneseKatakanaStemFil
public void testKatakanaStemming() throws IOException {
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- TokenStream tokenStream = tokenizerFactory.create(
- new StringReader("æå¾æ¥ãã¼ãã£ã¼ã«è¡ãäºå®ããããå³æ¸é¤¨ã§è³æãã³ãã¼ãã¾ããã")
- );
+ TokenStream tokenStream = tokenizerFactory.create();
+ ((Tokenizer)tokenStream).setReader(new StringReader("æå¾æ¥ãã¼ãã£ã¼ã«è¡ãäºå®ããããå³æ¸é¤¨ã§è³æãã³ãã¼ãã¾ããã"));
JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory(new HashMap<String,String>());;
assertTokenStreamContents(filterFactory.create(tokenStream),
new String[]{ "æå¾æ¥", "ãã¼ãã£", "ã«", "è¡ã", "äºå®", "ã", "ãã", // ãã¼ãã£ã¼ should be stemmed
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
/**
* Simple tests for {@link JapanesePartOfSpeechStopFilterFactory}
@@ -36,7 +37,8 @@ public class TestJapanesePartOfSpeechSto
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- TokenStream ts = tokenizerFactory.create(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶
ããã"));
+ TokenStream ts = tokenizerFactory.create();
+ ((Tokenizer)ts).setReader(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶
ããã"));
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
args.put("tags", "stoptags.txt");
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java Thu Jan 9 13:27:29 2014
@@ -34,16 +34,16 @@ import java.util.Random;
public class TestJapaneseReadingFormFilter extends BaseTokenStreamTestCase {
private Analyzer katakanaAnalyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, false));
}
};
private Analyzer romajiAnalyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, true));
}
};
@@ -58,8 +58,8 @@ public class TestJapaneseReadingFormFilt
public void testKatakanaReadingsHalfWidth() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
TokenStream stream = new CJKWidthFilter(tokenizer);
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false));
}
@@ -78,8 +78,8 @@ public class TestJapaneseReadingFormFilt
public void testRomajiReadingsHalfWidth() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
TokenStream stream = new CJKWidthFilter(tokenizer);
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true));
}
@@ -98,8 +98,8 @@ public class TestJapaneseReadingFormFilt
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ja;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import java.io.IOException;
import java.io.StringReader;
@@ -31,7 +32,8 @@ public class TestJapaneseReadingFormFilt
public void testReadings() throws IOException {
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- TokenStream tokenStream = tokenizerFactory.create(new StringReader("å
ã»ã©ãã«ãªã³ããæ¥ã¾ããã"));
+ TokenStream tokenStream = tokenizerFactory.create();
+ ((Tokenizer)tokenStream).setReader(new StringReader("å
ã»ã©ãã«ãªã³ããæ¥ã¾ããã"));
JapaneseReadingFormFilterFactory filterFactory = new JapaneseReadingFormFilterFactory(new HashMap<String,String>());
assertTokenStreamContents(filterFactory.create(tokenStream),
new String[] { "ãµã", "ãã", "ãã«ãªã³", "ã«ã©", "ã", "ãã·", "ã¿" }
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java Thu Jan 9 13:27:29 2014
@@ -61,32 +61,32 @@ public class TestJapaneseTokenizer exten
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.SEARCH);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
private Analyzer analyzerNormal = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.NORMAL);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.NORMAL);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
private Analyzer analyzerNoPunct = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), true, Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(readDict(), true, Mode.SEARCH);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
private Analyzer extendedModeAnalyzerNoPunct = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), true, Mode.EXTENDED);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(readDict(), true, Mode.EXTENDED);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
@@ -201,8 +201,8 @@ public class TestJapaneseTokenizer exten
checkRandomData(random,
new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.SEARCH);
TokenStream graph = new MockGraphTokenFilter(random(), tokenizer);
return new TokenStreamComponents(tokenizer, graph);
}
@@ -351,8 +351,8 @@ public class TestJapaneseTokenizer exten
final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance());
final Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ JapaneseTokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.SEARCH);
tokenizer.setGraphvizFormatter(gv2);
return new TokenStreamComponents(tokenizer, tokenizer);
}
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java Thu Jan 9 13:27:29 2014
@@ -25,6 +25,7 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
/**
* Simple tests for {@link JapaneseTokenizerFactory}
@@ -33,7 +34,8 @@ public class TestJapaneseTokenizerFactor
public void testSimple() throws IOException {
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String,String>());
factory.inform(new StringMockResourceLoader(""));
- TokenStream ts = factory.create(new StringReader("ããã¯æ¬ã§ã¯ãªã"));
+ TokenStream ts = factory.create();
+ ((Tokenizer)ts).setReader(new StringReader("ããã¯æ¬ã§ã¯ãªã"));
assertTokenStreamContents(ts,
new String[] { "ãã", "ã¯", "æ¬", "ã§", "ã¯", "ãªã" },
new int[] { 0, 2, 3, 4, 5, 6 },
@@ -47,7 +49,8 @@ public class TestJapaneseTokenizerFactor
public void testDefaults() throws IOException {
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String,String>());
factory.inform(new StringMockResourceLoader(""));
- TokenStream ts = factory.create(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
+ TokenStream ts = factory.create();
+ ((Tokenizer)ts).setReader(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
assertTokenStreamContents(ts,
new String[] { "ã·ãã¢", "ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢", "ã½ããã¦ã§ã¢", "ã¨ã³ã¸ãã¢" }
);
@@ -61,7 +64,8 @@ public class TestJapaneseTokenizerFactor
args.put("mode", "normal");
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(""));
- TokenStream ts = factory.create(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
+ TokenStream ts = factory.create();
+ ((Tokenizer)ts).setReader(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
assertTokenStreamContents(ts,
new String[] { "ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢" }
);
@@ -81,7 +85,8 @@ public class TestJapaneseTokenizerFactor
args.put("userDictionary", "userdict.txt");
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(userDict));
- TokenStream ts = factory.create(new StringReader("é¢è¥¿å½é空港ã«è¡ã£ã"));
+ TokenStream ts = factory.create();
+ ((Tokenizer)ts).setReader(new StringReader("é¢è¥¿å½é空港ã«è¡ã£ã"));
assertTokenStreamContents(ts,
new String[] { "é¢è¥¿", "å½é", "空港", "ã«", "è¡ã£", "ã" }
);
@@ -95,9 +100,8 @@ public class TestJapaneseTokenizerFactor
args.put("discardPunctuation", "false");
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(""));
- TokenStream ts = factory.create(
- new StringReader("ä»ãã«ã¦ã§ã¼ã«ãã¾ãããæ¥é±ã®é æ¥æ¬ã«æ»ãã¾ãã楽ãã¿ã«ãã¦ãã¾ãï¼ã寿å¸ãé£ã¹ãããªããã")
- );
+ TokenStream ts = factory.create();
+ ((Tokenizer)ts).setReader(new StringReader("ä»ãã«ã¦ã§ã¼ã«ãã¾ãããæ¥é±ã®é æ¥æ¬ã«æ»ãã¾ãã楽ãã¿ã«ãã¦ãã¾ãï¼ã寿å¸ãé£ã¹ãããªããã"));
assertTokenStreamContents(ts,
new String[] { "ä»", "ãã«ã¦ã§ã¼", "ã«", "ã", "ã¾ã", "ã", "ã",
"æ¥é±", "ã®", "é ", "æ¥æ¬", "ã«", "æ»ã", "ã¾ã", "ã",
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java Thu Jan 9 13:27:29 2014
@@ -34,8 +34,8 @@ public class TestSearchMode extends Base
private final static String SEGMENTATION_FILENAME = "search-segmentation-tests.txt";
private final Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, Mode.SEARCH);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new JapaneseTokenizer(null, true, Mode.SEARCH);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
Modified: lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -49,16 +49,14 @@ public class MorfologikAnalyzer extends
* which tokenizes all the text in the provided {@link Reader}.
*
* @param field ignored field name
- * @param reader source of tokens
- *
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter} and {@link MorfologikFilter}.
*/
@Override
- protected TokenStreamComponents createComponents(final String field, final Reader reader) {
- final Tokenizer src = new StandardTokenizer(this.version, reader);
+ protected TokenStreamComponents createComponents(final String field) {
+ final Tokenizer src = new StandardTokenizer(this.version);
return new TokenStreamComponents(
src,
Modified: lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -171,11 +171,11 @@ public class TestMorfologikAnalyzer exte
Analyzer a = new MorfologikAnalyzer(version) {
@Override
- protected TokenStreamComponents createComponents(String field, Reader reader) {
+ protected TokenStreamComponents createComponents(String field) {
final CharArraySet keywords = new CharArraySet(version, 1, false);
keywords.add("liÅcie");
- final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT);
TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
result = new SetKeywordMarkerFilter(result, keywords);
result = new MorfologikFilter(result, TEST_VERSION_CURRENT);
Modified: lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -32,7 +32,7 @@ public class TestMorfologikFilterFactory
public void testCreateDictionary() throws Exception {
StringReader reader = new StringReader("rowery bilety");
MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
}
Modified: lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java Thu Jan 9 13:27:29 2014
@@ -30,44 +30,50 @@ import org.apache.lucene.analysis.core.W
import org.apache.lucene.util._TestUtil;
public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase {
+
+ private TokenStream whitespaceTokenizer(String data) throws IOException {
+ WhitespaceTokenizer whitespaceTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT);
+ whitespaceTokenizer.setReader(new StringReader(data));
+ return whitespaceTokenizer;
+ }
public void testSize4FalseInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
+ TokenStream stream = whitespaceTokenizer("international");
TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
assertTokenStreamContents(filter, new String[] { "ANTR" });
}
public void testSize4TrueInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
+ TokenStream stream = whitespaceTokenizer("international");
TokenStream filter = new DoubleMetaphoneFilter(stream, 4, true);
assertTokenStreamContents(filter, new String[] { "international", "ANTR" });
}
public void testAlternateInjectFalse() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Kuczewski"));
+ TokenStream stream = whitespaceTokenizer("Kuczewski");
TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
assertTokenStreamContents(filter, new String[] { "KSSK", "KXFS" });
}
public void testSize8FalseInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
+ TokenStream stream = whitespaceTokenizer("international");
TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
assertTokenStreamContents(filter, new String[] { "ANTRNXNL" });
}
public void testNonConvertableStringsWithInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
+ TokenStream stream = whitespaceTokenizer("12345 #$%@#^%&");
TokenStream filter = new DoubleMetaphoneFilter(stream, 8, true);
assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
}
public void testNonConvertableStringsWithoutInject() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
+ TokenStream stream = whitespaceTokenizer("12345 #$%@#^%&");
TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
// should have something after the stream
- stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%& hello"));
+ stream = whitespaceTokenizer("12345 #$%@#^%& hello");
filter = new DoubleMetaphoneFilter(stream, 8, false);
assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&", "HL" });
}
@@ -77,8 +83,8 @@ public class DoubleMetaphoneFilterTest e
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, codeLen, false));
}
@@ -88,8 +94,8 @@ public class DoubleMetaphoneFilterTest e
Analyzer b = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, codeLen, true));
}
@@ -100,8 +106,8 @@ public class DoubleMetaphoneFilterTest e
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random().nextBoolean()));
}
};
Modified: lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Thu Jan 9 13:27:29 2014
@@ -41,8 +41,8 @@ import org.junit.Ignore;
public class TestBeiderMorseFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
}
@@ -71,8 +71,8 @@ public class TestBeiderMorseFilter exten
}});
Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer( MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new BeiderMorseFilter(tokenizer,
new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
@@ -101,8 +101,8 @@ public class TestBeiderMorseFilter exten
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
}
};
@@ -110,7 +110,8 @@ public class TestBeiderMorseFilter exten
}
public void testCustomAttribute() throws IOException {
- TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+ TokenStream stream = new KeywordTokenizer();
+ ((Tokenizer)stream).setReader(new StringReader("D'Angelo"));
stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenS
public class TestBeiderMorseFilterFactory extends BaseTokenStreamTestCase {
public void testBasics() throws Exception {
BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new HashMap<String,String>());
- TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+ TokenStream ts = factory.create(whitespaceMockTokenizer("Weinberg"));
assertTokenStreamContents(ts,
new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
new int[] { 0, 0, 0, 0, 0, 0 },
@@ -41,7 +41,7 @@ public class TestBeiderMorseFilterFactor
Map<String,String> args = new HashMap<String,String>();
args.put("languageSet", "polish");
BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
- TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+ TokenStream ts = factory.create(whitespaceMockTokenizer("Weinberg"));
assertTokenStreamContents(ts,
new String[] { "vDmbYrk", "vDmbirk", "vambYrk", "vambirk", "vimbYrk", "vimbirk" },
new int[] { 0, 0, 0, 0, 0, 0 },
@@ -54,7 +54,7 @@ public class TestBeiderMorseFilterFactor
args.put("nameType", "ASHKENAZI");
args.put("ruleType", "EXACT");
BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
- TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+ TokenStream ts = factory.create(whitespaceMockTokenizer("Weinberg"));
assertTokenStreamContents(ts,
new String[] { "vajnberk" },
new int[] { 0 },
Modified: lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -31,7 +31,7 @@ public class TestDoubleMetaphoneFilterFa
public void testDefaults() throws Exception {
DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap<String, String>());
- TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+ TokenStream inputStream = whitespaceMockTokenizer("international");
TokenStream filteredStream = factory.create(inputStream);
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
@@ -44,7 +44,7 @@ public class TestDoubleMetaphoneFilterFa
parameters.put("maxCodeLength", "8");
DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(parameters);
- TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+ TokenStream inputStream = whitespaceMockTokenizer("international");
TokenStream filteredStream = factory.create(inputStream);
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
Modified: lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Thu Jan 9 13:27:29 2014
@@ -66,8 +66,8 @@ public class TestPhoneticFilter extends
static void assertAlgorithm(Encoder encoder, boolean inject, String input,
String[] expected) throws Exception {
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader(input));
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT);
+ tokenizer.setReader(new StringReader(input));
PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
assertTokenStreamContents(filter, expected);
}
@@ -81,8 +81,8 @@ public class TestPhoneticFilter extends
for (final Encoder e : encoders) {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
}
};
@@ -91,8 +91,8 @@ public class TestPhoneticFilter extends
Analyzer b = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
}
};
@@ -108,8 +108,8 @@ public class TestPhoneticFilter extends
for (final Encoder e : encoders) {
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean()));
}
};
Modified: lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -181,7 +181,7 @@ public class TestPhoneticFilterFactory e
static void assertAlgorithm(String algName, String inject, String input,
String[] expected) throws Exception {
- Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+ Tokenizer tokenizer = whitespaceMockTokenizer(input);
Map<String,String> args = new HashMap<String,String>();
args.put("encoder", algName);
args.put("inject", inject);
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Thu Jan 9 13:27:29 2014
@@ -48,12 +48,11 @@ public final class SentenceTokenizer ext
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
- public SentenceTokenizer(Reader reader) {
- super(reader);
+ public SentenceTokenizer() {
}
- public SentenceTokenizer(AttributeFactory factory, Reader reader) {
- super(factory, reader);
+ public SentenceTokenizer(AttributeFactory factory) {
+ super(factory);
}
@Override
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -136,8 +136,8 @@ public final class SmartChineseAnalyzer
}
@Override
- public TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new SentenceTokenizer(reader);
+ public TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new SentenceTokenizer();
TokenStream result = new WordTokenFilter(tokenizer);
// result = new LowerCaseFilter(result);
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java Thu Jan 9 13:27:29 2014
@@ -38,7 +38,7 @@ public class SmartChineseSentenceTokeniz
}
@Override
- public SentenceTokenizer create(AttributeFactory factory, Reader input) {
- return new SentenceTokenizer(factory, input);
+ public SentenceTokenizer create(AttributeFactory factory) {
+ return new SentenceTokenizer(factory);
}
}
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -211,8 +211,8 @@ public class TestSmartChineseAnalyzer ex
public void testInvalidOffset() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
filters = new WordTokenFilter(filters);
return new TokenStreamComponents(tokenizer, filters);
@@ -240,8 +240,8 @@ public class TestSmartChineseAnalyzer ex
Random random = random();
Analyzer a = new Analyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new WordTokenFilter(tokenizer));
}
};
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseFactories.java Thu Jan 9 13:27:29 2014
@@ -24,6 +24,7 @@ import java.util.HashMap;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
/**
* Tests for {@link SmartChineseSentenceTokenizerFactory} and
@@ -33,7 +34,7 @@ public class TestSmartChineseFactories e
/** Test showing the behavior with whitespace */
public void testSimple() throws Exception {
Reader reader = new StringReader("æè´ä¹°äºéå
·åæè£
ã");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
SmartChineseWordTokenFilterFactory factory = new SmartChineseWordTokenFilterFactory(new HashMap<String,String>());
stream = factory.create(stream);
// TODO: fix smart chinese to not emit punctuation tokens
@@ -46,7 +47,8 @@ public class TestSmartChineseFactories e
public void testTokenizer() throws Exception {
Reader reader = new StringReader("æè´ä¹°äºéå
·åæè£
ãæè´ä¹°äºéå
·åæè£
ã");
SmartChineseSentenceTokenizerFactory tokenizerFactory = new SmartChineseSentenceTokenizerFactory(new HashMap<String,String>());
- TokenStream stream = tokenizerFactory.create(reader);
+ TokenStream stream = tokenizerFactory.create();
+ ((Tokenizer)stream).setReader(reader);
SmartChineseWordTokenFilterFactory factory = new SmartChineseWordTokenFilterFactory(new HashMap<String,String>());
stream = factory.create(stream);
// TODO: fix smart chinese to not emit punctuation tokens
Modified: lucene/dev/trunk/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -139,9 +139,8 @@ public final class PolishAnalyzer extend
* provided and {@link StempelFilter}.
*/
@Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ final Tokenizer source = new StandardTokenizer(matchVersion);
TokenStream result = new StandardFilter(matchVersion, source);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
Modified: lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java Thu Jan 9 13:27:29 2014
@@ -32,7 +32,7 @@ public class TestStempelPolishStemFilter
public void testBasics() throws Exception {
Reader reader = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(new HashMap<String,String>());
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream stream = whitespaceMockTokenizer(reader);
stream = factory.create(stream);
assertTokenStreamContents(stream,
new String[] { "student", "student" });
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu Jan 9 13:27:29 2014
@@ -45,8 +45,8 @@ public abstract class BaseUIMATokenizer
protected CAS cas;
protected BaseUIMATokenizer
- (AttributeFactory factory, Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
- super(factory, reader);
+ (AttributeFactory factory, String descriptorPath, Map<String, Object> configurationParameters) {
+ super(factory);
this.descriptorPath = descriptorPath;
this.configurationParameters = configurationParameters;
}
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Thu Jan 9 13:27:29 2014
@@ -42,13 +42,13 @@ public final class UIMAAnnotationsTokeni
private int finalOffset = 0;
- public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, Reader input) {
- this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
+ public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
+ this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
}
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
- AttributeFactory factory, Reader input) {
- super(factory, input, descriptorPath, configurationParameters);
+ AttributeFactory factory) {
+ super(factory, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.offsetAttr = addAttribute(OffsetAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java Thu Jan 9 13:27:29 2014
@@ -42,7 +42,7 @@ public class UIMAAnnotationsTokenizerFac
}
@Override
- public UIMAAnnotationsTokenizer create(AttributeFactory factory, Reader input) {
- return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory, input);
+ public UIMAAnnotationsTokenizer create(AttributeFactory factory) {
+ return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory);
}
}
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -38,8 +38,8 @@ public final class UIMABaseAnalyzer exte
}
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, reader));
+ protected TokenStreamComponents createComponents(String fieldName) {
+ return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters));
}
}
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java Thu Jan 9 13:27:29 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
-import java.io.Reader;
import java.util.Map;
/**
@@ -39,7 +38,7 @@ public final class UIMATypeAwareAnalyzer
}
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, reader));
+ protected TokenStreamComponents createComponents(String fieldName) {
+ return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters));
}
}
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Thu Jan 9 13:27:29 2014
@@ -52,13 +52,13 @@ public final class UIMATypeAwareAnnotati
private int finalOffset = 0;
- public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input) {
- this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
+ public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
+ this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
}
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
- Map<String, Object> configurationParameters, AttributeFactory factory, Reader input) {
- super(factory, input, descriptorPath, configurationParameters);
+ Map<String, Object> configurationParameters, AttributeFactory factory) {
+ super(factory, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.typeAttr = addAttribute(TypeAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java Thu Jan 9 13:27:29 2014
@@ -44,8 +44,8 @@ public class UIMATypeAwareAnnotationsTok
}
@Override
- public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory, Reader input) {
+ public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory) {
return new UIMATypeAwareAnnotationsTokenizer
- (descriptorPath, tokenType, featurePath, configurationParameters, factory, input);
+ (descriptorPath, tokenType, featurePath, configurationParameters, factory);
}
}
Modified: lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/AnalyzerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/AnalyzerFactory.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/AnalyzerFactory.java (original)
+++ lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/AnalyzerFactory.java Thu Jan 9 13:27:29 2014
@@ -78,8 +78,8 @@ public final class AnalyzerFactory {
}
@Override
- protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
- final Tokenizer tokenizer = tokenizerFactory.create(reader);
+ protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
+ final Tokenizer tokenizer = tokenizerFactory.create();
TokenStream tokenStream = tokenizer;
for (TokenFilterFactory filterFactory : tokenFilterFactories) {
tokenStream = filterFactory.create(tokenStream);
Modified: lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java?rev=1556801&r1=1556800&r2=1556801&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java (original)
+++ lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java Thu Jan 9 13:27:29 2014
@@ -55,8 +55,8 @@ public class SimpleNaiveBayesClassifierT
private class NGramAnalyzer extends Analyzer {
@Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- final Tokenizer tokenizer = new KeywordTokenizer(reader);
+ protected TokenStreamComponents createComponents(String fieldName) {
+ final Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer), 10, 20)));
}
}