You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/18 14:58:55 UTC
svn commit: r1124242 [2/2] - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/ lucene/contrib/ lucene/contrib/analyzers/common/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/
lucene/contrib/analyzers/co...
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java Wed May 18 12:58:53 2011
@@ -20,8 +20,8 @@ package org.apache.lucene.analysis.tr;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Test the Turkish lowercase filter.
@@ -32,8 +32,8 @@ public class TestTurkishLowerCaseFilter
* Test composed forms
*/
public void testTurkishLowerCaseFilter() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "\u0130STANBUL \u0130ZM\u0130R ISPARTA"));
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "\u0130STANBUL \u0130ZM\u0130R ISPARTA"), MockTokenizer.WHITESPACE, false);
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
"\u0131sparta",});
@@ -43,8 +43,8 @@ public class TestTurkishLowerCaseFilter
* Test decomposed forms
*/
public void testDecomposed() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"));
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"), MockTokenizer.WHITESPACE, false);
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
"\u0131sparta",});
@@ -56,8 +56,8 @@ public class TestTurkishLowerCaseFilter
* to U+0130 + U+0316, and is lowercased the same way.
*/
public void testDecomposed2() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"));
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"), MockTokenizer.WHITESPACE, false);
TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
"\u0131\u0316sparta",});
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java Wed May 18 12:58:53 2011
@@ -152,8 +152,6 @@ public abstract class ReusableAnalyzerBa
*/
protected boolean reset(final Reader reader) throws IOException {
source.reset(reader);
- if(sink != source)
- sink.reset(); // only reset if the sink reference is different from source
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Wed May 18 12:58:53 2011
@@ -91,7 +91,7 @@ public class MockTokenizer extends Token
endOffset = off;
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
- offsetAtt.setOffset(startOffset, endOffset);
+ offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
streamState = State.INCREMENT;
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java Wed May 18 12:58:53 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
import java.util.List;
@@ -27,14 +28,14 @@ public class TestASCIIFoldingFilter exte
// testLain1Accents() is a copy of TestLatin1AccentFilter.testU().
public void testLatin1Accents() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader
+ TokenStream stream = new MockTokenizer(new StringReader
("Des mot clés à LA CHAÃNE à à à à à Ã
à à à à à à à à à à IJ à Ã"
+" à à à à à à Šà à à à à à Ÿ à á â ã ä å æ ç è é ê ë ì à î ï ij"
- +" ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ ï¬ ï¬"));
+ +" ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ ï¬ ï¬"), MockTokenizer.WHITESPACE, false);
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
-
+ filter.reset();
assertTermEquals("Des", filter, termAtt);
assertTermEquals("mot", filter, termAtt);
assertTermEquals("cles", filter, termAtt);
@@ -1888,10 +1889,11 @@ public class TestASCIIFoldingFilter exte
expectedOutputTokens.add(expected.toString());
}
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(inputText.toString()));
+ TokenStream stream = new MockTokenizer(new StringReader(inputText.toString()), MockTokenizer.WHITESPACE, false);
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
Iterator<String> expectedIter = expectedOutputTokens.iterator();
+ filter.reset();
while (expectedIter.hasNext()) {
assertTermEquals(expectedIter.next(), filter, termAtt);
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java Wed May 18 12:58:53 2011
@@ -40,17 +40,17 @@ public class TestKeywordMarkerFilter ext
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
"jumps" };
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "The quIck browN LuceneFox Jumps")), set)), output);
+ new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
Set<String> jdkSet = new HashSet<String>();
jdkSet.add("LuceneFox");
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "The quIck browN LuceneFox Jumps")), jdkSet)), output);
+ new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), jdkSet)), output);
Set<?> set2 = set;
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
- "The quIck browN LuceneFox Jumps")), set2)), output);
+ new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
}
// LUCENE-2901
@@ -58,8 +58,7 @@ public class TestKeywordMarkerFilter ext
TokenStream ts = new LowerCaseFilterMock(
new KeywordMarkerFilter(
new KeywordMarkerFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("Dogs Trees Birds Houses")),
+ new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))),
new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java Wed May 18 12:58:53 2011
@@ -17,14 +17,13 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
public class TestLengthFilter extends BaseTokenStreamTestCase {
public void testFilterNoPosIncr() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
+ TokenStream stream = new MockTokenizer(
+ new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
LengthFilter filter = new LengthFilter(false, stream, 2, 6);
assertTokenStreamContents(filter,
new String[]{"short", "ab", "foo"},
@@ -33,8 +32,8 @@ public class TestLengthFilter extends Ba
}
public void testFilterWithPosIncr() throws Exception {
- TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
+ TokenStream stream = new MockTokenizer(
+ new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
LengthFilter filter = new LengthFilter(true, stream, 2, 6);
assertTokenStreamContents(filter,
new String[]{"short", "ab", "foo"},
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java Wed May 18 12:58:53 2011
@@ -58,55 +58,55 @@ public class TestMappingCharFilter exten
public void testNothingChange() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
- TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
}
public void test1to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
}
public void test1to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
}
public void test1to3() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
}
public void test2to4() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
}
public void test2to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
}
public void test3to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
}
public void test4to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
}
public void test5to0() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[0]);
}
@@ -130,7 +130,7 @@ public class TestMappingCharFilter exten
//
public void testTokenStream() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
new int[]{0,2,4,6,8,11,16,20},
@@ -151,7 +151,7 @@ public class TestMappingCharFilter exten
public void testChained() throws Exception {
CharStream cs = new MappingCharFilter( normMap,
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
- TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"a","llllllll","i"},
new int[]{0,5,8},
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java Wed May 18 12:58:53 2011
@@ -23,6 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -36,7 +37,7 @@ public class TestPorterStemFilter extend
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
+ Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t, new PorterStemFilter(t));
}
};
@@ -52,7 +53,7 @@ public class TestPorterStemFilter extend
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("yourselves");
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java Wed May 18 12:58:53 2011
@@ -28,6 +28,9 @@ import java.util.Arrays;
import java.util.Set;
import java.util.HashSet;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
public class TestStopFilter extends BaseTokenStreamTestCase {
@@ -36,36 +39,23 @@ public class TestStopFilter extends Base
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = new HashSet<String>(Arrays.asList("is", "the", "Time"));
- TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
- final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.toString());
- assertTrue(stream.incrementToken());
- assertEquals("The", termAtt.toString());
- assertFalse(stream.incrementToken());
+ TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
+ assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<Object> stopWords = new HashSet<Object>(Arrays.asList( "is", "the", "Time" ));
- TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
- final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.toString());
- assertFalse(stream.incrementToken());
+ TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
+ assertTokenStreamContents(stream, new String[] { "Now" });
}
public void testStopFilt() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
- TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
- final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
- assertTrue(stream.incrementToken());
- assertEquals("Now", termAtt.toString());
- assertTrue(stream.incrementToken());
- assertEquals("The", termAtt.toString());
- assertFalse(stream.incrementToken());
+ TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
+ assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
/**
@@ -85,11 +75,11 @@ public class TestStopFilter extends Base
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
- StopFilter stpf = new StopFilter(Version.LUCENE_24, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+ StopFilter stpf = new StopFilter(Version.LUCENE_24, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,true);
// without increments
reader = new StringReader(sb.toString());
- stpf = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+ stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,false);
// with increments, concatenating two stop filters
ArrayList<String> a0 = new ArrayList<String>();
@@ -108,7 +98,7 @@ public class TestStopFilter extends Base
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
reader = new StringReader(sb.toString());
- StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet0); // first part of the set
+ StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true);
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01,true);
@@ -119,6 +109,7 @@ public class TestStopFilter extends Base
stpf.setEnablePositionIncrements(enableIcrements);
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
+ stpf.reset();
for (int i=0; i<20; i+=3) {
assertTrue(stpf.incrementToken());
log("Token "+i+": "+stpf);
@@ -127,6 +118,8 @@ public class TestStopFilter extends Base
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
}
assertFalse(stpf.incrementToken());
+ stpf.end();
+ stpf.close();
}
// print debug info depending on VERBOSE
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java Wed May 18 12:58:53 2011
@@ -67,9 +67,8 @@ public class TestTeeSinkTokenFilter exte
}
};
-
public void testGeneral() throws IOException {
- final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString())));
+ final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
final TokenStream sink1 = source.newSinkTokenStream();
final TokenStream sink2 = source.newSinkTokenStream(theFilter);
@@ -83,16 +82,17 @@ public class TestTeeSinkTokenFilter exte
}
public void testMultipleSources() throws Exception {
- final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString())));
+ final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
+ tee1.reset();
final TokenStream source1 = new CachingTokenFilter(tee1);
tee1.addAttribute(CheckClearAttributesAttribute.class);
dogDetector.addAttribute(CheckClearAttributesAttribute.class);
theDetector.addAttribute(CheckClearAttributesAttribute.class);
- final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer2.toString())));
+ final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false));
tee2.addSinkTokenStream(dogDetector);
tee2.addSinkTokenStream(theDetector);
final TokenStream source2 = tee2;
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java Wed May 18 12:58:53 2011
@@ -49,6 +49,7 @@ public final class CommonGramsQueryFilte
private State previous;
private String previousType;
+ private boolean exhausted;
/**
* Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter
@@ -67,6 +68,7 @@ public final class CommonGramsQueryFilte
super.reset();
previous = null;
previousType = null;
+ exhausted = false;
}
/**
@@ -79,7 +81,7 @@ public final class CommonGramsQueryFilte
*/
@Override
public boolean incrementToken() throws IOException {
- while (input.incrementToken()) {
+ while (!exhausted && input.incrementToken()) {
State current = captureState();
if (previous != null && !isGramType()) {
@@ -96,6 +98,8 @@ public final class CommonGramsQueryFilte
previous = current;
}
+ exhausted = true;
+
if (previous == null || GRAM_TYPE.equals(previousType)) {
return false;
}
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java Wed May 18 12:58:53 2011
@@ -59,6 +59,7 @@ public final class HyphenatedWordsFilter
private final StringBuilder hyphenated = new StringBuilder();
private State savedState;
+ private boolean exhausted = false;
/**
* Creates a new HyphenatedWordsFilter
@@ -74,7 +75,7 @@ public final class HyphenatedWordsFilter
*/
@Override
public boolean incrementToken() throws IOException {
- while (input.incrementToken()) {
+ while (!exhausted && input.incrementToken()) {
char[] term = termAttribute.buffer();
int termLength = termAttribute.length();
@@ -96,6 +97,8 @@ public final class HyphenatedWordsFilter
}
}
+ exhausted = true;
+
if (savedState != null) {
// the final term ends with a hyphen
// add back the hyphen, for backwards compatibility.
@@ -115,6 +118,7 @@ public final class HyphenatedWordsFilter
super.reset();
hyphenated.setLength(0);
savedState = null;
+ exhausted = false;
}
// ================================================= Helper Methods ================================================
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java Wed May 18 12:58:53 2011
@@ -191,14 +191,18 @@ public final class SynonymFilter extends
private LinkedList<AttributeSource> buffer;
private LinkedList<AttributeSource> matched;
+ private boolean exhausted;
+
private AttributeSource nextTok() throws IOException {
if (buffer!=null && !buffer.isEmpty()) {
return buffer.removeFirst();
} else {
- if (input.incrementToken()) {
+ if (!exhausted && input.incrementToken()) {
return this;
- } else
+ } else {
+ exhausted = true;
return null;
+ }
}
}
@@ -251,5 +255,6 @@ public final class SynonymFilter extends
public void reset() throws IOException {
input.reset();
replacement = null;
+ exhausted = false;
}
}
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java Wed May 18 12:58:53 2011
@@ -21,6 +21,7 @@ import java.io.StringReader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
@@ -88,7 +89,7 @@ public class CommonGramsFilterTest exten
@Override
public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsQueryFilter(new CommonGramsFilter(
- new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords));
+ new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
}
};
@@ -157,7 +158,7 @@ public class CommonGramsFilterTest exten
@Override
public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsFilter(
- new WhitespaceTokenizer(DEFAULT_VERSION, in), commonWords);
+ new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
}
};
@@ -243,7 +244,7 @@ public class CommonGramsFilterTest exten
*/
public void testCaseSensitive() throws Exception {
final String input = "How The s a brown s cow d like A B thing?";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
Set common = CommonGramsFilter.makeCommonSet(commonWords);
TokenFilter cgf = new CommonGramsFilter(wt, common, false);
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
@@ -256,7 +257,7 @@ public class CommonGramsFilterTest exten
*/
public void testLastWordisStopWord() throws Exception {
final String input = "dog the";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "dog_the" });
@@ -267,7 +268,7 @@ public class CommonGramsFilterTest exten
*/
public void testFirstWordisStopWord() throws Exception {
final String input = "the dog";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_dog" });
@@ -278,7 +279,7 @@ public class CommonGramsFilterTest exten
*/
public void testOneWordQueryStopWord() throws Exception {
final String input = "the";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the" });
@@ -289,7 +290,7 @@ public class CommonGramsFilterTest exten
*/
public void testOneWordQuery() throws Exception {
final String input = "monster";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "monster" });
@@ -300,7 +301,7 @@ public class CommonGramsFilterTest exten
*/
public void TestFirstAndLastStopWord() throws Exception {
final String input = "the of";
- WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_of" });
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestHyphenatedWordsFilter.java Wed May 18 12:58:53 2011
@@ -19,8 +19,8 @@ package org.apache.solr.analysis;
import java.io.StringReader;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* HyphenatedWordsFilter test
@@ -29,7 +29,7 @@ public class TestHyphenatedWordsFilter e
public void testHyphenatedWords() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecologi-\ncal";
// first test
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
HyphenatedWordsFilterFactory factory = new HyphenatedWordsFilterFactory();
ts = factory.create(ts);
assertTokenStreamContents(ts,
@@ -42,7 +42,7 @@ public class TestHyphenatedWordsFilter e
public void testHyphenAtEnd() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre-\u0009hensive-hands-on and ecology-";
// first test
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ TokenStream ts = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
HyphenatedWordsFilterFactory factory = new HyphenatedWordsFilterFactory();
ts = factory.create(ts);
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestKeepWordFilter.java Wed May 18 12:58:53 2011
@@ -23,8 +23,9 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
@@ -53,9 +54,14 @@ public class TestKeepWordFilter extends
factory.setWords( words );
assertTrue(factory.isIgnoreCase());
assertTrue(factory.isEnablePositionIncrements());
- TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
+ TokenStream stream = factory.create(new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
-
+
+ // Now force case
+ stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+ stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
+ assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
+
// Test Stopwords (ignoreCase via the setter instead)
factory = new KeepWordFilterFactory();
args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
@@ -65,7 +71,7 @@ public class TestKeepWordFilter extends
factory.setWords( words );
assertTrue(factory.isIgnoreCase());
assertFalse(factory.isEnablePositionIncrements());
- stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
+ stream = factory.create(new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
// Now force case and posIncr
@@ -78,7 +84,12 @@ public class TestKeepWordFilter extends
factory.setWords( words );
assertFalse(factory.isIgnoreCase());
assertTrue(factory.isEnablePositionIncrements());
- stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
+ stream = factory.create(new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
+
+ // Now force case
+ stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+ stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
+ assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
}
}
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java Wed May 18 12:58:53 2011
@@ -25,8 +25,8 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
*
@@ -47,7 +47,7 @@ public class TestPatternReplaceCharFilte
factory.init(args);
CharStream cs = factory.create(
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "this", "is", "test." },
new int[] { 0, 5, 8 },
@@ -64,8 +64,8 @@ public class TestPatternReplaceCharFilte
factory.init(args);
CharStream cs = factory.create(
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
- assertFalse(ts.incrementToken());
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new String[] {});
}
// 012345678
@@ -80,7 +80,7 @@ public class TestPatternReplaceCharFilte
factory.init(args);
CharStream cs = factory.create(
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa#bb#cc" },
new int[] { 0 },
@@ -95,7 +95,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "aa bb cc dd";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa##bb###cc", "dd" },
new int[] { 0, 9 },
@@ -109,7 +109,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = " a a";
CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "aa" },
new int[] { 1, 4 },
@@ -124,7 +124,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = "aa bb cc dd";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa#bb", "dd" },
new int[] { 0, 12 },
@@ -139,7 +139,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = " aa bb cc --- aa bb aa bb cc";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" },
new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 },
@@ -154,7 +154,7 @@ public class TestPatternReplaceCharFilte
final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc";
CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2", ".",
CharReader.get( new StringReader( BLOCK ) ) );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" },
new int[] { 2, 8, 11, 15, 21, 25, 28, 36 },
@@ -171,7 +171,7 @@ public class TestPatternReplaceCharFilte
CharReader.get( new StringReader( BLOCK ) ) );
cs = new PatternReplaceCharFilter( pattern("bb"), "b", ".", cs );
cs = new PatternReplaceCharFilter( pattern("ccc"), "c", ".", cs );
- TokenStream ts = new WhitespaceTokenizer(DEFAULT_VERSION, cs );
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 },
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java Wed May 18 12:58:53 2011
@@ -17,8 +17,8 @@
package org.apache.solr.analysis;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import java.io.StringReader;
import java.util.regex.Pattern;
@@ -31,7 +31,7 @@ public class TestPatternReplaceFilter ex
public void testReplaceAll() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
"-", true);
assertTokenStreamContents(ts,
@@ -41,7 +41,7 @@ public class TestPatternReplaceFilter ex
public void testReplaceFirst() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
"-", false);
assertTokenStreamContents(ts,
@@ -51,7 +51,7 @@ public class TestPatternReplaceFilter ex
public void testStripFirst() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
null, false);
assertTokenStreamContents(ts,
@@ -61,7 +61,7 @@ public class TestPatternReplaceFilter ex
public void testStripAll() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
null, true);
assertTokenStreamContents(ts,
@@ -71,7 +71,7 @@ public class TestPatternReplaceFilter ex
public void testReplaceAllWithBackRef() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
- (new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)),
+ (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("(a*)b"),
"$1\\$", true);
assertTokenStreamContents(ts,
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java Wed May 18 12:58:53 2011
@@ -20,7 +20,6 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@@ -35,6 +34,8 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.List;
+import org.apache.lucene.analysis.MockTokenizer;
+
/**
* @version $Id$
*/
@@ -47,14 +48,14 @@ public class TestSynonymFilter extends B
static void assertTokenizesTo(SynonymMap dict, String input,
String expected[]) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
SynonymFilter stream = new SynonymFilter(tokenizer, dict);
assertTokenStreamContents(stream, expected);
}
static void assertTokenizesTo(SynonymMap dict, String input,
String expected[], int posIncs[]) throws IOException {
- Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
SynonymFilter stream = new SynonymFilter(tokenizer, dict);
assertTokenStreamContents(stream, expected, posIncs);
}
Modified: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java?rev=1124242&r1=1124241&r2=1124242&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java Wed May 18 12:58:53 2011
@@ -19,8 +19,8 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
@@ -338,8 +338,8 @@ public class TestWordDelimiterFilter ext
}
public void doSplit(final String input, String... output) throws Exception {
- WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), 1, 1, 0, 0, 0);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
+ new StringReader(input), MockTokenizer.KEYWORD, false), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
assertTokenStreamContents(wdf, output);
}
@@ -380,8 +380,8 @@ public class TestWordDelimiterFilter ext
}
public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
- WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
- new StringReader(input)), 1,1,0,0,0,1,0,1,stemPossessive, null);
+ WordDelimiterFilter wdf = new WordDelimiterFilter(new MockTokenizer(
+ new StringReader(input), MockTokenizer.KEYWORD, false), 1,1,0,0,0,1,0,1,stemPossessive, null);
assertTokenStreamContents(wdf, output);
}
@@ -427,7 +427,7 @@ public class TestWordDelimiterFilter ext
@Override
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
- new WhitespaceTokenizer(DEFAULT_VERSION, reader),
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false),
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
}
};
@@ -455,7 +455,7 @@ public class TestWordDelimiterFilter ext
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
new LargePosIncTokenFilter(
- new WhitespaceTokenizer(DEFAULT_VERSION, reader)),
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)),
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);
}
};
@@ -486,8 +486,8 @@ public class TestWordDelimiterFilter ext
Analyzer a3 = new Analyzer() {
@Override
public TokenStream tokenStream(String field, Reader reader) {
- StopFilter filter = new StopFilter(DEFAULT_VERSION,
- new WhitespaceTokenizer(DEFAULT_VERSION, reader), StandardAnalyzer.STOP_WORDS_SET);
+ StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
+ new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET);
filter.setEnablePositionIncrements(true);
return new WordDelimiterFilter(filter,
1, 1, 0, 0, 1, 1, 0, 1, 1, protWords);