You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC
svn commit: r1487777 [10/50] - in /lucene/dev/branches/security: ./
dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java Thu May 30 07:53:18 2013
@@ -19,33 +19,21 @@ package org.apache.lucene.analysis.hi;
import java.io.Reader;
import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.in.IndicNormalizationFilterFactory;
-import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
* Simple tests to ensure the Hindi filter Factories are working.
*/
-public class TestHindiFilters extends BaseTokenStreamTestCase {
+public class TestHindiFilters extends BaseTokenStreamFactoryTestCase {
/**
* Test IndicNormalizationFilterFactory
*/
public void testIndicNormalizer() throws Exception {
Reader reader = new StringReader("তà§â à¤
ाà¥à¤°");
- StandardTokenizerFactory factory = new StandardTokenizerFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory();
- filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
- filterFactory.init(args);
- Tokenizer tokenizer = factory.create(reader);
- TokenStream stream = filterFactory.create(tokenizer);
+ TokenStream stream = tokenizerFactory("Standard").create(reader);
+ stream = tokenFilterFactory("IndicNormalization").create(stream);
assertTokenStreamContents(stream, new String[] { "à§", "à¤à¤°" });
}
@@ -54,17 +42,9 @@ public class TestHindiFilters extends Ba
*/
public void testHindiNormalizer() throws Exception {
Reader reader = new StringReader("à¥à¤¿à¤¤à¤¾à¤¬");
- StandardTokenizerFactory factory = new StandardTokenizerFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
- HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
- hindiFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
- hindiFilterFactory.init(args);
- Tokenizer tokenizer = factory.create(reader);
- TokenStream stream = indicFilterFactory.create(tokenizer);
- stream = hindiFilterFactory.create(stream);
+ TokenStream stream = tokenizerFactory("Standard").create(reader);
+ stream = tokenFilterFactory("IndicNormalization").create(stream);
+ stream = tokenFilterFactory("HindiNormalization").create(stream);
assertTokenStreamContents(stream, new String[] {"à¤à¤¿à¤¤à¤¾à¤¬"});
}
@@ -73,19 +53,34 @@ public class TestHindiFilters extends Ba
*/
public void testStemmer() throws Exception {
Reader reader = new StringReader("à¤à¤¿à¤¤à¤¾à¤¬à¥à¤");
- StandardTokenizerFactory factory = new StandardTokenizerFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
- HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
- HindiStemFilterFactory stemFactory = new HindiStemFilterFactory();
- stemFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
- stemFactory.init(args);
- Tokenizer tokenizer = factory.create(reader);
- TokenStream stream = indicFilterFactory.create(tokenizer);
- stream = hindiFilterFactory.create(stream);
- stream = stemFactory.create(stream);
+ TokenStream stream = tokenizerFactory("Standard").create(reader);
+ stream = tokenFilterFactory("IndicNormalization").create(stream);
+ stream = tokenFilterFactory("HindiNormalization").create(stream);
+ stream = tokenFilterFactory("HindiStem").create(stream);
assertTokenStreamContents(stream, new String[] {"à¤à¤¿à¤¤à¤¾à¤¬"});
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("IndicNormalization", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+
+ try {
+ tokenFilterFactory("HindiNormalization", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+
+ try {
+ tokenFilterFactory("HindiStem", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java Thu May 30 07:53:18 2013
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -55,7 +55,7 @@ public class TestHungarianLightStemFilte
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+ TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new HungarianLightStemFilter(sink));
}
};
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,28 @@ package org.apache.lucene.analysis.hu;
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
* Simple tests to ensure the Hungarian light stem factory is working.
*/
-public class TestHungarianLightStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestHungarianLightStemFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
Reader reader = new StringReader("házakat");
- HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory();
- TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("HungarianLightStem").create(stream);
assertTokenStreamContents(stream, new String[] { "haz" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("HungarianLightStem", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Thu May 30 07:53:18 2013
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.BaseTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -63,7 +63,7 @@ public class HunspellStemFilterTest ext
// assert with keywork marker
tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
- filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY);
+ filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), DICTIONARY);
assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,29 +19,33 @@ package org.apache.lucene.analysis.hunsp
import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
* Simple tests to ensure the Hunspell stemmer loads from factory
*/
-public class TestHunspellStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestHunspellStemFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
- HunspellStemFilterFactory factory = new HunspellStemFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("dictionary", "test.dic");
- args.put("affix", "test.aff");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(new ClasspathResourceLoader(getClass()));
-
Reader reader = new StringReader("abc");
- TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("HunspellStem",
+ "dictionary", "test.dic",
+ "affix", "test.aff").create(stream);
assertTokenStreamContents(stream, new String[] { "ab" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("HunspellStem",
+ "dictionary", "test.dic",
+ "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,28 +19,22 @@ package org.apache.lucene.analysis.id;
import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
* Simple tests to ensure the Indonesian stem filter factory is working.
*/
-public class TestIndonesianStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestIndonesianStemFilterFactory extends BaseTokenStreamFactoryTestCase {
/**
* Ensure the filter actually stems text.
*/
public void testStemming() throws Exception {
Reader reader = new StringReader("dibukukannya");
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- factory.init(args);
- TokenStream stream = factory.create(tokenizer);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("IndonesianStem").create(stream);
assertTokenStreamContents(stream, new String[] { "buku" });
}
@@ -49,12 +43,18 @@ public class TestIndonesianStemFilterFac
*/
public void testStemmingInflectional() throws Exception {
Reader reader = new StringReader("dibukukannya");
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("stemDerivational", "false");
- factory.init(args);
- TokenStream stream = factory.create(tokenizer);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("IndonesianStem", "stemDerivational", "false").create(stream);
assertTokenStreamContents(stream, new String[] { "dibukukan" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("IndonesianStem", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,28 @@ package org.apache.lucene.analysis.it;
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
* Simple tests to ensure the Italian light stem factory is working.
*/
-public class TestItalianLightStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestItalianLightStemFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
Reader reader = new StringReader("ragazzo ragazzi");
- ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory();
- TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("ItalianLightStem").create(stream);
assertTokenStreamContents(stream, new String[] { "ragazz", "ragazz" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("ItalianLightStem", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,28 @@ package org.apache.lucene.analysis.lv;
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/**
* Simple tests to ensure the Latvian stem factory is working.
*/
-public class TestLatvianStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestLatvianStemFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testStemming() throws Exception {
Reader reader = new StringReader("tirgiem tirgus");
- LatvianStemFilterFactory factory = new LatvianStemFilterFactory();
- TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("LatvianStem").create(stream);
assertTokenStreamContents(stream, new String[] { "tirg", "tirg" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("LatvianStem", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,127 +17,188 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
+import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-/**
- *
- */
-public class TestCapitalizationFilterFactory extends BaseTokenStreamTestCase {
+public class TestCapitalizationFilterFactory extends BaseTokenStreamFactoryTestCase {
+
+ public void testCapitalization() throws Exception {
+ Reader reader = new StringReader("kiTTEN");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Kitten" });
+ }
+
+ public void testCapitalization2() throws Exception {
+ Reader reader = new StringReader("and");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "And" });
+ }
+
+ /** first is forced, but it's not a keep word, either */
+ public void testCapitalization3() throws Exception {
+ Reader reader = new StringReader("AnD");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "And" });
+ }
+
+ public void testCapitalization4() throws Exception {
+ Reader reader = new StringReader("AnD");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "false").create(stream);
+ assertTokenStreamContents(stream, new String[] { "And" });
+ }
+
+ public void testCapitalization5() throws Exception {
+ Reader reader = new StringReader("big");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Big" });
+ }
+
+ public void testCapitalization6() throws Exception {
+ Reader reader = new StringReader("BIG");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "BIG" });
+ }
+
+ public void testCapitalization7() throws Exception {
+ Reader reader = new StringReader("Hello thEre my Name is Ryan");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
+ }
+
+ public void testCapitalization8() throws Exception {
+ Reader reader = new StringReader("Hello thEre my Name is Ryan");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "false",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
+ }
- public void testCapitalization() throws Exception
- {
- Map<String,String> args = new HashMap<String, String>();
- args.put( CapitalizationFilterFactory.KEEP, "and the it BIG" );
- args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
-
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init( args );
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.WHITESPACE, false)),
- new String[] { "Kitten" });
-
- factory.forceFirstLetter = true;
-
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("and"), MockTokenizer.WHITESPACE, false)),
- new String[] { "And" });
-
- //first is forced, but it's not a keep word, either
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
- new String[] { "And" });
-
- factory.forceFirstLetter = false;
-
- //first is not forced, but it's not a keep word, either
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
- new String[] { "And" });
-
- factory.forceFirstLetter = true;
-
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("big"), MockTokenizer.WHITESPACE, false)),
- new String[] { "Big" });
-
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("BIG"), MockTokenizer.WHITESPACE, false)),
- new String[] { "BIG" });
-
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.KEYWORD, false)),
- new String[] { "Hello there my name is ryan" });
-
- // now each token
- factory.onlyFirstWord = false;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
- new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
-
- // now only the long words
- factory.minWordLength = 3;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
- new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
-
- // without prefix
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
- new String[] { "Mckinley" });
-
- // Now try some prefixes
- factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- args.put( "okPrefix", "McK" ); // all words
- factory.init( args );
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
- new String[] { "McKinley" });
-
- // now try some stuff with numbers
- factory.forceFirstLetter = false;
- factory.onlyFirstWord = false;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("1st 2nd third"), MockTokenizer.WHITESPACE, false)),
- new String[] { "1st", "2nd", "Third" });
-
- factory.forceFirstLetter = true;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("the The the"), MockTokenizer.KEYWORD, false)),
- new String[] { "The The the" });
+ public void testCapitalization9() throws Exception {
+ Reader reader = new StringReader("Hello thEre my Name is Ryan");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "false",
+ "minWordLength", "3",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
+ }
+
+ public void testCapitalization10() throws Exception {
+ Reader reader = new StringReader("McKinley");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "false",
+ "minWordLength", "3",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Mckinley" });
+ }
+
+ /** using "McK" as okPrefix */
+ public void testCapitalization11() throws Exception {
+ Reader reader = new StringReader("McKinley");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "false",
+ "minWordLength", "3",
+ "okPrefix", "McK",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "McKinley" });
+ }
+
+ /** test with numbers */
+ public void testCapitalization12() throws Exception {
+ Reader reader = new StringReader("1st 2nd third");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "false",
+ "minWordLength", "3",
+ "okPrefix", "McK",
+ "forceFirstLetter", "false").create(stream);
+ assertTokenStreamContents(stream, new String[] { "1st", "2nd", "Third" });
+ }
+
+ public void testCapitalization13() throws Exception {
+ Reader reader = new StringReader("the The the");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "and the it BIG",
+ "onlyFirstWord", "false",
+ "minWordLength", "3",
+ "okPrefix", "McK",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "The The the" });
}
public void testKeepIgnoreCase() throws Exception {
- Map<String,String> args = new HashMap<String, String>();
- args.put( CapitalizationFilterFactory.KEEP, "kitten" );
- args.put( CapitalizationFilterFactory.KEEP_IGNORE_CASE, "true" );
- args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
-
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init( args );
- factory.forceFirstLetter = true;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
- new String[] { "KiTTEN" });
-
- factory.forceFirstLetter = false;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
- new String[] { "kiTTEN" });
-
- factory.keep = null;
- assertTokenStreamContents(factory.create(
- new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
- new String[] { "Kitten" });
+ Reader reader = new StringReader("kiTTEN");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "kitten",
+ "keepIgnoreCase", "true",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "true").create(stream);
+
+ assertTokenStreamContents(stream, new String[] { "KiTTEN" });
+ }
+
+ public void testKeepIgnoreCase2() throws Exception {
+ Reader reader = new StringReader("kiTTEN");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "kitten",
+ "keepIgnoreCase", "true",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "false").create(stream);
+
+ assertTokenStreamContents(stream, new String[] { "kiTTEN" });
+ }
+
+ public void testKeepIgnoreCase3() throws Exception {
+ Reader reader = new StringReader("kiTTEN");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keepIgnoreCase", "true",
+ "onlyFirstWord", "true",
+ "forceFirstLetter", "false").create(stream);
+
+ assertTokenStreamContents(stream, new String[] { "Kitten" });
}
/**
@@ -146,16 +207,12 @@ public class TestCapitalizationFilterFac
* This is very weird when combined with ONLY_FIRST_WORD!!!
*/
public void testMinWordLength() throws Exception {
- Map<String,String> args = new HashMap<String,String>();
- args.put(CapitalizationFilterFactory.ONLY_FIRST_WORD, "true");
- args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- Tokenizer tokenizer = new MockTokenizer(new StringReader(
- "helo testing"), MockTokenizer.WHITESPACE, false);
- TokenStream ts = factory.create(tokenizer);
- assertTokenStreamContents(ts, new String[] {"helo", "Testing"});
+ Reader reader = new StringReader("helo testing");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "onlyFirstWord", "true",
+ "minWordLength", "5").create(stream);
+ assertTokenStreamContents(stream, new String[] { "helo", "Testing" });
}
/**
@@ -163,30 +220,22 @@ public class TestCapitalizationFilterFac
* in each token (it should do nothing)
*/
public void testMaxWordCount() throws Exception {
- Map<String,String> args = new HashMap<String,String>();
- args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- Tokenizer tokenizer = new MockTokenizer(new StringReader(
- "one two three four"), MockTokenizer.WHITESPACE, false);
- TokenStream ts = factory.create(tokenizer);
- assertTokenStreamContents(ts, new String[] {"One", "Two", "Three", "Four"});
+ Reader reader = new StringReader("one two three four");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "maxWordCount", "2").create(stream);
+ assertTokenStreamContents(stream, new String[] { "One", "Two", "Three", "Four" });
}
/**
* Test CapitalizationFilterFactory's maxWordCount option when exceeded
*/
public void testMaxWordCount2() throws Exception {
- Map<String,String> args = new HashMap<String,String>();
- args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- Tokenizer tokenizer = new MockTokenizer(new StringReader(
- "one two three four"), MockTokenizer.KEYWORD, false);
- TokenStream ts = factory.create(tokenizer);
- assertTokenStreamContents(ts, new String[] {"one two three four"});
+ Reader reader = new StringReader("one two three four");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ stream = tokenFilterFactory("Capitalization",
+ "maxWordCount", "2").create(stream);
+ assertTokenStreamContents(stream, new String[] { "one two three four" });
}
/**
@@ -195,29 +244,32 @@ public class TestCapitalizationFilterFac
* This is weird, it is not really a max, but inclusive (look at 'is')
*/
public void testMaxTokenLength() throws Exception {
- Map<String,String> args = new HashMap<String,String>();
- args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- Tokenizer tokenizer = new MockTokenizer(new StringReader(
- "this is a test"), MockTokenizer.WHITESPACE, false);
- TokenStream ts = factory.create(tokenizer);
- assertTokenStreamContents(ts, new String[] {"this", "is", "A", "test"});
+ Reader reader = new StringReader("this is a test");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "maxTokenLength", "2").create(stream);
+ assertTokenStreamContents(stream, new String[] { "this", "is", "A", "test" });
}
/**
* Test CapitalizationFilterFactory's forceFirstLetter option
*/
- public void testForceFirstLetter() throws Exception {
- Map<String,String> args = new HashMap<String,String>();
- args.put(CapitalizationFilterFactory.KEEP, "kitten");
- args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
- CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- Tokenizer tokenizer = new MockTokenizer(new StringReader("kitten"), MockTokenizer.WHITESPACE, false);
- TokenStream ts = factory.create(tokenizer);
- assertTokenStreamContents(ts, new String[] {"Kitten"});
+ public void testForceFirstLetterWithKeep() throws Exception {
+ Reader reader = new StringReader("kitten");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Capitalization",
+ "keep", "kitten",
+ "forceFirstLetter", "true").create(stream);
+ assertTokenStreamContents(stream, new String[] { "Kitten" });
+ }
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("Capitalization", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,45 +17,38 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
-import java.util.Map;
-import java.util.HashMap;
-
-/**
- *
- *
- **/
-public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
+public class TestKeepFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
- KeepWordFilterFactory factory = new KeepWordFilterFactory();
- Map<String, String> args = new HashMap<String, String>();
- args.put("words", "keep-1.txt");
- args.put("ignoreCase", "true");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(loader);
+ KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
+ "words", "keep-1.txt",
+ "ignoreCase", "true");
CharArraySet words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
-
- factory = new KeepWordFilterFactory();
- args.put("words", "keep-1.txt, keep-2.txt");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(loader);
+ factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
+ "words", "keep-1.txt, keep-2.txt",
+ "ignoreCase", "true");
words = factory.getWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
-
-
-
+ }
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("KeepWord", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
}
}
\ No newline at end of file
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Thu May 30 07:53:18 2013
@@ -42,23 +42,13 @@ public class TestKeepWordFilter extends
// Test Stopwords
TokenStream stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
- stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
+ stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
// Now force case
stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
- stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
+ stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
-
- // Test Stopwords
- stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
- stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
- assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
-
- // Now force case
- stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
- stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
- assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
}
/** blast some random strings through the analyzer */
@@ -72,7 +62,7 @@ public class TestKeepWordFilter extends
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- TokenStream stream = new KeepWordFilter(true, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
+ TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
return new TokenStreamComponents(tokenizer, stream);
}
};
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java Thu May 30 07:53:18 2013
@@ -3,6 +3,7 @@ package org.apache.lucene.analysis.misce
import java.io.IOException;
import java.io.StringReader;
import java.util.Locale;
+import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -36,34 +37,68 @@ import org.junit.Test;
public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
@Test
- public void testIncrementToken() throws IOException {
+ public void testSetFilterIncrementToken() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true);
set.add("lucenefox");
String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
"jumps" };
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false);
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output);
CharArraySet set2 = set;
assertTokenStreamContents(new LowerCaseFilterMock(
- new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+ new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
"The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
}
+
+ @Test
+ public void testPatternFilterIncrementToken() throws IOException {
+ String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
+ "jumps" };
+ assertTokenStreamContents(new LowerCaseFilterMock(
+ new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[fF]ox"))), output);
+
+ output = new String[] { "the", "quick", "brown", "lucenefox",
+ "jumps" };
+
+ assertTokenStreamContents(new LowerCaseFilterMock(
+ new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader(
+ "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[f]ox"))), output);
+ }
// LUCENE-2901
public void testComposition() throws Exception {
TokenStream ts = new LowerCaseFilterMock(
- new KeywordMarkerFilter(
- new KeywordMarkerFilter(
+ new SetKeywordMarkerFilter(
+ new SetKeywordMarkerFilter(
new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)),
new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+
+ ts = new LowerCaseFilterMock(
+ new PatternKeywordMarkerFilter(
+ new PatternKeywordMarkerFilter(
+ new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+ Pattern.compile("Birds|Houses")),
+ Pattern.compile("Dogs|Trees")));
+
+ assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+
+ ts = new LowerCaseFilterMock(
+ new SetKeywordMarkerFilter(
+ new PatternKeywordMarkerFilter(
+ new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+ Pattern.compile("Birds|Houses")),
+ new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
+
+ assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
}
public static final class LowerCaseFilterMock extends TokenFilter {
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,52 +17,89 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
/**
* Simple tests to ensure the keyword marker filter factory is working.
*/
-public class TestKeywordMarkerFilterFactory extends BaseTokenStreamTestCase {
- public void testKeywords() throws IOException {
+public class TestKeywordMarkerFilterFactory extends BaseTokenStreamFactoryTestCase {
+
+ public void testKeywords() throws Exception {
+ Reader reader = new StringReader("dogs cats");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+ new StringMockResourceLoader("cats"),
+ "protected", "protwords.txt").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+ assertTokenStreamContents(stream, new String[] { "dog", "cats" });
+ }
+
+ public void testKeywords2() throws Exception {
Reader reader = new StringReader("dogs cats");
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- ResourceLoader loader = new StringMockResourceLoader("cats");
- args.put("protected", "protwords.txt");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(loader);
-
- TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
- assertTokenStreamContents(ts, new String[] { "dog", "cats" });
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KeywordMarker",
+ "pattern", "cats|Dogs").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+ assertTokenStreamContents(stream, new String[] { "dog", "cats" });
}
- public void testKeywordsCaseInsensitive() throws IOException {
+ public void testKeywordsMixed() throws Exception {
+ Reader reader = new StringReader("dogs cats birds");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+ new StringMockResourceLoader("cats"),
+ "protected", "protwords.txt",
+ "pattern", "birds|Dogs").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+ assertTokenStreamContents(stream, new String[] { "dog", "cats", "birds" });
+ }
+
+ public void testKeywordsCaseInsensitive() throws Exception {
Reader reader = new StringReader("dogs cats Cats");
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- ResourceLoader loader = new StringMockResourceLoader("cats");
- args.put("protected", "protwords.txt");
- args.put("ignoreCase", "true");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(loader);
-
- TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
- assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" });
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+ new StringMockResourceLoader("cats"),
+ "protected", "protwords.txt",
+ "ignoreCase", "true").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+ assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats" });
+ }
+
+ public void testKeywordsCaseInsensitive2() throws Exception {
+ Reader reader = new StringReader("dogs cats Cats");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KeywordMarker",
+ "pattern", "Cats",
+ "ignoreCase", "true").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);;
+ assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats" });
+ }
+
+ public void testKeywordsCaseInsensitiveMixed() throws Exception {
+ Reader reader = new StringReader("dogs cats Cats Birds birds");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+ new StringMockResourceLoader("cats"),
+ "protected", "protwords.txt",
+ "pattern", "birds",
+ "ignoreCase", "true").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+ assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats", "Birds", "birds" });
+ }
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("KeywordMarker", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Thu May 30 07:53:18 2013
@@ -17,29 +17,23 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+
public class TestLengthFilter extends BaseTokenStreamTestCase {
-
- public void testFilterNoPosIncr() throws Exception {
- TokenStream stream = new MockTokenizer(
- new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
- LengthFilter filter = new LengthFilter(false, stream, 2, 6);
- assertTokenStreamContents(filter,
- new String[]{"short", "ab", "foo"},
- new int[]{1, 1, 1}
- );
- }
public void testFilterWithPosIncr() throws Exception {
TokenStream stream = new MockTokenizer(
new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
- LengthFilter filter = new LengthFilter(true, stream, 2, 6);
+ LengthFilter filter = new LengthFilter(TEST_VERSION_CURRENT, stream, 2, 6);
assertTokenStreamContents(filter,
new String[]{"short", "ab", "foo"},
new int[]{1, 4, 2}
@@ -51,7 +45,7 @@ public class TestLengthFilter extends Ba
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);
- return new TokenStreamComponents(tokenizer, new LengthFilter(true, tokenizer, 0, 5));
+ return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
}
};
checkOneTermReuse(a, "", "");
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java Thu May 30 07:53:18 2013
@@ -16,35 +16,34 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-public class TestLengthFilterFactory extends BaseTokenStreamTestCase {
+public class TestLengthFilterFactory extends BaseTokenStreamFactoryTestCase {
- public void test() throws IOException {
- LengthFilterFactory factory = new LengthFilterFactory();
- Map<String, String> args = new HashMap<String, String>();
- args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
- args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
- // default: args.put("enablePositionIncrements", "false");
- factory.init(args);
- String test = "foo foobar super-duper-trooper";
- TokenStream stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
- assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
-
- factory = new LengthFilterFactory();
- args = new HashMap<String, String>();
- args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
- args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
- args.put("enablePositionIncrements", "true");
- factory.init(args);
- stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
+ public void testPositionIncrements() throws Exception {
+ Reader reader = new StringReader("foo foobar super-duper-trooper");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Length",
+ "min", "4",
+ "max", "10").create(stream);
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("Length",
+ "min", "4",
+ "max", "5",
+ "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java Thu May 30 07:53:18 2013
@@ -16,40 +16,46 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-public class TestLimitTokenCountFilterFactory extends BaseTokenStreamTestCase {
+public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTestCase {
- public void test() throws IOException {
- LimitTokenCountFilterFactory factory = new LimitTokenCountFilterFactory();
- Map<String, String> args = new HashMap<String, String>();
- args.put(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3");
- factory.init(args);
- String test = "A1 B2 C3 D4 E5 F6";
- MockTokenizer tok = new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false);
+ public void test() throws Exception {
+ Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+ MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
// LimitTokenCountFilter doesn't consume the entire stream that it wraps
- tok.setEnableChecks(false);
- TokenStream stream = factory.create(tok);
+ tokenizer.setEnableChecks(false);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("LimitTokenCount",
+ "maxTokenCount", "3").create(stream);
assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+ }
+ public void testRequired() throws Exception {
// param is required
- factory = new LimitTokenCountFilterFactory();
- args = new HashMap<String, String>();
- IllegalArgumentException iae = null;
try {
- factory.init(args);
+ tokenFilterFactory("LimitTokenCount");
+ fail();
} catch (IllegalArgumentException e) {
assertTrue("exception doesn't mention param: " + e.getMessage(),
0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
- iae = e;
}
- assertNotNull("no exception thrown", iae);
+ }
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("LimitTokenCount",
+ "maxTokenCount", "3",
+ "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,54 +17,24 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-import java.util.Iterator;
-import java.util.Arrays;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
/** Simple tests to ensure this factory is working */
-public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenStreamTestCase {
+public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenStreamFactoryTestCase {
public static Token tok(int pos, String t, int start, int end) {
Token tok = new Token(t,start,end);
tok.setPositionIncrement(pos);
return tok;
}
- public static Token tok(int pos, String t) {
- return tok(pos, t, 0,0);
- }
- public void testDups(final String expected, final Token... tokens)
- throws Exception {
-
- final Iterator<Token> toks = Arrays.asList(tokens).iterator();
- RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
- final TokenStream ts = factory.create
- (new TokenStream() {
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- @Override
- public boolean incrementToken() {
- if (toks.hasNext()) {
- clearAttributes();
- Token tok = toks.next();
- termAtt.setEmpty().append(tok);
- offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
- posIncAtt.setPositionIncrement(tok.getPositionIncrement());
- return true;
- } else {
- return false;
- }
- }
- });
-
- assertTokenStreamContents(ts, expected.split("\\s"));
+ public void testDups(final String expected, final Token... tokens) throws Exception {
+ TokenStream stream = new CannedTokenStream(tokens);
+ stream = tokenFilterFactory("RemoveDuplicates").create(stream);
+ assertTokenStreamContents(stream, expected.split("\\s"));
}
public void testSimpleDups() throws Exception {
@@ -77,4 +47,14 @@ public class TestRemoveDuplicatesTokenFi
,tok(1,"E",21, 25)
);
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("RemoveDuplicates", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Thu May 30 07:53:18 2013
@@ -1,15 +1,4 @@
package org.apache.lucene.analysis.miscellaneous;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -26,17 +15,132 @@ import org.apache.lucene.analysis.Tokeni
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ *
+ */
public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
public void testOverride() throws IOException {
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
- CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
- dictionary.put("booked", "books");
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
+ builder.add("booked", "books");
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
- TokenStream stream = new PorterStemFilter(
- new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
- assertTokenStreamContents(stream, new String[] { "books" });
+ TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+ tokenizer, builder.build()));
+ assertTokenStreamContents(stream, new String[] {"books"});
+ }
+
+ public void testIgnoreCase() throws IOException {
+ // lets make booked stem to books
+ // the override filter will convert "booked" to "books",
+ // but also mark it with KeywordAttribute so Porter will not change it.
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
+ builder.add("boOkEd", "books");
+ Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
+ TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+ tokenizer, builder.build()));
+ assertTokenStreamContents(stream, new String[] {"books"});
+ }
+
+ public void testNoOverrides() throws IOException {
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
+ Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
+ TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+ tokenizer, builder.build()));
+ assertTokenStreamContents(stream, new String[] {"book"});
+ }
+
+ public void testRandomRealisticWhiteSpace() throws IOException {
+ Map<String,String> map = new HashMap<String,String>();
+ int numTerms = atLeast(50);
+ for (int i = 0; i < numTerms; i++) {
+ String randomRealisticUnicodeString = _TestUtil
+ .randomRealisticUnicodeString(random());
+ char[] charArray = randomRealisticUnicodeString.toCharArray();
+ StringBuilder builder = new StringBuilder();
+ for (int j = 0; j < charArray.length;) {
+ int cp = Character.codePointAt(charArray, j);
+ if (!Character.isWhitespace(cp)) {
+ builder.appendCodePoint(cp);
+ }
+ j += Character.charCount(cp);
+ }
+ if (builder.length() > 0) {
+ String value = _TestUtil.randomSimpleString(random());
+ map.put(builder.toString(),
+ value.isEmpty() ? "a" : value);
+
+ }
+ }
+ if (map.isEmpty()) {
+ map.put("booked", "books");
+ }
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
+ Set<Entry<String,String>> entrySet = map.entrySet();
+ StringBuilder input = new StringBuilder();
+ List<String> output = new ArrayList<String>();
+ for (Entry<String,String> entry : entrySet) {
+ builder.add(entry.getKey(), entry.getValue());
+ if (random().nextBoolean() || output.isEmpty()) {
+ input.append(entry.getKey()).append(" ");
+ output.add(entry.getValue());
+ }
+ }
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new StringReader(input.toString()));
+ TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+ tokenizer, builder.build()));
+ assertTokenStreamContents(stream, output.toArray(new String[0]));
+ }
+
+ public void testRandomRealisticKeyword() throws IOException {
+ Map<String,String> map = new HashMap<String,String>();
+ int numTerms = atLeast(50);
+ for (int i = 0; i < numTerms; i++) {
+ String randomRealisticUnicodeString = _TestUtil
+ .randomRealisticUnicodeString(random());
+ if (randomRealisticUnicodeString.length() > 0) {
+ String value = _TestUtil.randomSimpleString(random());
+ map.put(randomRealisticUnicodeString,
+ value.isEmpty() ? "a" : value);
+ }
+ }
+ if (map.isEmpty()) {
+ map.put("booked", "books");
+ }
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
+ Set<Entry<String,String>> entrySet = map.entrySet();
+ for (Entry<String,String> entry : entrySet) {
+ builder.add(entry.getKey(), entry.getValue());
+ }
+ StemmerOverrideMap build = builder.build();
+ for (Entry<String,String> entry : entrySet) {
+ if (random().nextBoolean()) {
+ Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
+ entry.getKey()));
+ TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+ tokenizer, build));
+ assertTokenStreamContents(stream, new String[] {entry.getValue()});
+ }
+ }
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,53 +17,49 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
/**
* Simple tests to ensure the stemmer override filter factory is working.
*/
-public class TestStemmerOverrideFilterFactory extends BaseTokenStreamTestCase {
- public void testKeywords() throws IOException {
+public class TestStemmerOverrideFilterFactory extends BaseTokenStreamFactoryTestCase {
+ public void testKeywords() throws Exception {
// our stemdict stems dogs to 'cat'
Reader reader = new StringReader("testing dogs");
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
- args.put("dictionary", "stemdict.txt");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(loader);
-
- TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
- assertTokenStreamContents(ts, new String[] { "test", "cat" });
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
+ new StringMockResourceLoader("dogs\tcat"),
+ "dictionary", "stemdict.txt").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+
+ assertTokenStreamContents(stream, new String[] { "test", "cat" });
}
- public void testKeywordsCaseInsensitive() throws IOException {
+ public void testKeywordsCaseInsensitive() throws Exception {
Reader reader = new StringReader("testing DoGs");
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
- args.put("dictionary", "stemdict.txt");
- args.put("ignoreCase", "true");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
- factory.inform(loader);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
+ new StringMockResourceLoader("dogs\tcat"),
+ "dictionary", "stemdict.txt",
+ "ignoreCase", "true").create(stream);
+ stream = tokenFilterFactory("PorterStem").create(stream);
- TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
- assertTokenStreamContents(ts, new String[] { "test", "cat" });
+ assertTokenStreamContents(stream, new String[] { "test", "cat" });
+ }
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ tokenFilterFactory("StemmerOverride", "bogusArg", "bogusValue");
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Thu May 30 07:53:18 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.misce
import java.io.IOException;
import java.io.Reader;
-import java.util.Collection;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -28,7 +27,13 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
/**
*/
@@ -46,30 +51,9 @@ public class TestTrimFilter extends Base
new Token(ccc, 0, ccc.length, 11, 15),
new Token(whitespace, 0, whitespace.length, 16, 20),
new Token(empty, 0, empty.length, 21, 21));
- ts = new TrimFilter(ts, false);
+ ts = new TrimFilter(TEST_VERSION_CURRENT, ts);
assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""});
-
- a = " a".toCharArray();
- b = "b ".toCharArray();
- ccc = " c ".toCharArray();
- whitespace = " ".toCharArray();
- ts = new IterTokenStream(
- new Token(a, 0, a.length, 0, 2),
- new Token(b, 0, b.length, 0, 2),
- new Token(ccc, 0, ccc.length, 0, 3),
- new Token(whitespace, 0, whitespace.length, 0, 3));
- ts = new TrimFilter(ts, true);
-
- assertTokenStreamContents(ts,
- new String[] { "a", "b", "c", "" },
- new int[] { 1, 0, 1, 3 },
- new int[] { 2, 1, 2, 3 },
- null,
- new int[] { 1, 1, 1, 1 },
- null,
- null,
- false);
}
/**
@@ -91,10 +75,6 @@ public class TestTrimFilter extends Base
this.tokens = tokens;
}
- public IterTokenStream(Collection<Token> tokens) {
- this(tokens.toArray(new Token[tokens.size()]));
- }
-
@Override
public boolean incrementToken() throws IOException {
if (index >= tokens.length)
@@ -120,20 +100,10 @@ public class TestTrimFilter extends Base
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
- return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, false));
+ return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer));
}
};
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
-
- Analyzer b = new Analyzer() {
-
- @Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
- return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, true));
- }
- };
- checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
}
public void testEmptyTerm() throws IOException {
@@ -141,7 +111,8 @@ public class TestTrimFilter extends Base
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);
- return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, random().nextBoolean()));
+ final Version version = TEST_VERSION_CURRENT;
+ return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer));
}
};
checkOneTermReuse(a, "", "");