You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC

svn commit: r1487777 [10/50] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java Thu May 30 07:53:18 2013
@@ -19,33 +19,21 @@ package org.apache.lucene.analysis.hi;
 
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.in.IndicNormalizationFilterFactory;
-import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /**
  * Simple tests to ensure the Hindi filter Factories are working.
  */
-public class TestHindiFilters extends BaseTokenStreamTestCase {
+public class TestHindiFilters extends BaseTokenStreamFactoryTestCase {
   /**
    * Test IndicNormalizationFilterFactory
    */
   public void testIndicNormalizer() throws Exception {
     Reader reader = new StringReader("ত্‍ अाैर");
-    StandardTokenizerFactory factory = new StandardTokenizerFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory();
-    filterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
-    filterFactory.init(args);
-    Tokenizer tokenizer = factory.create(reader);
-    TokenStream stream = filterFactory.create(tokenizer);
+    TokenStream stream = tokenizerFactory("Standard").create(reader);
+    stream = tokenFilterFactory("IndicNormalization").create(stream);
     assertTokenStreamContents(stream, new String[] { "ৎ", "और" });
   }
   
@@ -54,17 +42,9 @@ public class TestHindiFilters extends Ba
    */
   public void testHindiNormalizer() throws Exception {
     Reader reader = new StringReader("क़िताब");
-    StandardTokenizerFactory factory = new StandardTokenizerFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
-    HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
-    hindiFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
-    hindiFilterFactory.init(args);
-    Tokenizer tokenizer = factory.create(reader);
-    TokenStream stream = indicFilterFactory.create(tokenizer);
-    stream = hindiFilterFactory.create(stream);
+    TokenStream stream = tokenizerFactory("Standard").create(reader);
+    stream = tokenFilterFactory("IndicNormalization").create(stream);
+    stream = tokenFilterFactory("HindiNormalization").create(stream);
     assertTokenStreamContents(stream, new String[] {"किताब"});
   }
   
@@ -73,19 +53,34 @@ public class TestHindiFilters extends Ba
    */
   public void testStemmer() throws Exception {
     Reader reader = new StringReader("किताबें");
-    StandardTokenizerFactory factory = new StandardTokenizerFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
-    HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
-    HindiStemFilterFactory stemFactory = new HindiStemFilterFactory();
-    stemFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
-    stemFactory.init(args);
-    Tokenizer tokenizer = factory.create(reader);
-    TokenStream stream = indicFilterFactory.create(tokenizer);
-    stream = hindiFilterFactory.create(stream);
-    stream = stemFactory.create(stream);
+    TokenStream stream = tokenizerFactory("Standard").create(reader);
+    stream = tokenFilterFactory("IndicNormalization").create(stream);
+    stream = tokenFilterFactory("HindiNormalization").create(stream);
+    stream = tokenFilterFactory("HindiStem").create(stream);
     assertTokenStreamContents(stream, new String[] {"किताब"});
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("IndicNormalization", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+    
+    try {
+      tokenFilterFactory("HindiNormalization", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+    
+    try {
+      tokenFilterFactory("HindiStem", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java Thu May 30 07:53:18 2013
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
@@ -55,7 +55,7 @@ public class TestHungarianLightStemFilte
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-        TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
         return new TokenStreamComponents(source, new HungarianLightStemFilter(sink));
       }
     };

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,28 @@ package org.apache.lucene.analysis.hu;
 import java.io.Reader;
 import java.io.StringReader;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /**
  * Simple tests to ensure the Hungarian light stem factory is working.
  */
-public class TestHungarianLightStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestHungarianLightStemFilterFactory extends BaseTokenStreamFactoryTestCase {
   public void testStemming() throws Exception {
     Reader reader = new StringReader("házakat");
-    HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory();
-    TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("HungarianLightStem").create(stream);
     assertTokenStreamContents(stream, new String[] { "haz" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("HungarianLightStem", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Thu May 30 07:53:18 2013
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -63,7 +63,7 @@ public class HunspellStemFilterTest  ext
     // assert with keywork marker
     tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true);
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
-    filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY);
+    filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), DICTIONARY);
     assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
   }
   

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,29 +19,33 @@ package org.apache.lucene.analysis.hunsp
 
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /**
  * Simple tests to ensure the Hunspell stemmer loads from factory
  */
-public class TestHunspellStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestHunspellStemFilterFactory extends BaseTokenStreamFactoryTestCase {
   public void testStemming() throws Exception {
-    HunspellStemFilterFactory factory = new HunspellStemFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    args.put("dictionary", "test.dic");
-    args.put("affix", "test.aff");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(new ClasspathResourceLoader(getClass()));
-    
     Reader reader = new StringReader("abc");
-    TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("HunspellStem",
+        "dictionary", "test.dic",
+        "affix", "test.aff").create(stream);
     assertTokenStreamContents(stream, new String[] { "ab" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("HunspellStem",
+          "dictionary", "test.dic",
+          "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,28 +19,22 @@ package org.apache.lucene.analysis.id;
 
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /**
  * Simple tests to ensure the Indonesian stem filter factory is working.
  */
-public class TestIndonesianStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestIndonesianStemFilterFactory extends BaseTokenStreamFactoryTestCase {
   /**
    * Ensure the filter actually stems text.
    */
   public void testStemming() throws Exception {
     Reader reader = new StringReader("dibukukannya");
-    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    factory.init(args);
-    TokenStream stream = factory.create(tokenizer);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("IndonesianStem").create(stream);
     assertTokenStreamContents(stream, new String[] { "buku" });
   }
   
@@ -49,12 +43,18 @@ public class TestIndonesianStemFilterFac
    */
   public void testStemmingInflectional() throws Exception {
     Reader reader = new StringReader("dibukukannya");
-    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    IndonesianStemFilterFactory factory = new IndonesianStemFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    args.put("stemDerivational", "false");
-    factory.init(args);
-    TokenStream stream = factory.create(tokenizer);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("IndonesianStem", "stemDerivational", "false").create(stream);
     assertTokenStreamContents(stream, new String[] { "dibukukan" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("IndonesianStem", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,28 @@ package org.apache.lucene.analysis.it;
 import java.io.Reader;
 import java.io.StringReader;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /**
  * Simple tests to ensure the Italian light stem factory is working.
  */
-public class TestItalianLightStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestItalianLightStemFilterFactory extends BaseTokenStreamFactoryTestCase {
   public void testStemming() throws Exception {
     Reader reader = new StringReader("ragazzo ragazzi");
-    ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory();
-    TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("ItalianLightStem").create(stream);
     assertTokenStreamContents(stream, new String[] { "ragazz", "ragazz" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("ItalianLightStem", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,28 @@ package org.apache.lucene.analysis.lv;
 import java.io.Reader;
 import java.io.StringReader;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /**
  * Simple tests to ensure the Latvian stem factory is working.
  */
-public class TestLatvianStemFilterFactory extends BaseTokenStreamTestCase {
+public class TestLatvianStemFilterFactory extends BaseTokenStreamFactoryTestCase {
   public void testStemming() throws Exception {
     Reader reader = new StringReader("tirgiem tirgus");
-    LatvianStemFilterFactory factory = new LatvianStemFilterFactory();
-    TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("LatvianStem").create(stream);
     assertTokenStreamContents(stream, new String[] { "tirg", "tirg" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("LatvianStem", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,127 +17,188 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
+import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
-/**
- * 
- */
-public class TestCapitalizationFilterFactory extends BaseTokenStreamTestCase {
+public class TestCapitalizationFilterFactory extends BaseTokenStreamFactoryTestCase {
+  
+  public void testCapitalization() throws Exception {
+    Reader reader = new StringReader("kiTTEN");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Kitten" });
+  }
+  
+  public void testCapitalization2() throws Exception {
+    Reader reader = new StringReader("and");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "And" });
+  }
+  
+  /** first is forced, but it's not a keep word, either */
+  public void testCapitalization3() throws Exception {
+    Reader reader = new StringReader("AnD");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "And" });
+  }
+  
+  public void testCapitalization4() throws Exception {
+    Reader reader = new StringReader("AnD");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "false").create(stream);
+    assertTokenStreamContents(stream, new String[] { "And" });
+  }
+  
+  public void testCapitalization5() throws Exception {
+    Reader reader = new StringReader("big");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Big" });
+  }
+  
+  public void testCapitalization6() throws Exception {
+    Reader reader = new StringReader("BIG");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "BIG" });
+  }
+  
+  public void testCapitalization7() throws Exception {
+    Reader reader = new StringReader("Hello thEre my Name is Ryan");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
+  }
+  
+  public void testCapitalization8() throws Exception {
+    Reader reader = new StringReader("Hello thEre my Name is Ryan");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "false",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
+  }
   
-  public void testCapitalization() throws Exception 
-  {
-    Map<String,String> args = new HashMap<String, String>();
-    args.put( CapitalizationFilterFactory.KEEP, "and the it BIG" );
-    args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );  
-    
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init( args );
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "Kitten" });
-    
-    factory.forceFirstLetter = true;
-
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("and"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "And" });
-
-    //first is forced, but it's not a keep word, either
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "And" });
-
-    factory.forceFirstLetter = false;
-
-    //first is not forced, but it's not a keep word, either
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("AnD"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "And" });
-
-    factory.forceFirstLetter = true;
-    
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("big"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "Big" });
-    
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("BIG"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "BIG" });
-
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.KEYWORD, false)),
-        new String[] { "Hello there my name is ryan" });
-        
-    // now each token
-    factory.onlyFirstWord = false;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
-    
-    // now only the long words
-    factory.minWordLength = 3;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("Hello thEre my Name is Ryan"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
-    
-    // without prefix
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "Mckinley" });
-    
-    // Now try some prefixes
-    factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    args.put( "okPrefix", "McK" );  // all words
-    factory.init( args );
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("McKinley"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "McKinley" });
-    
-    // now try some stuff with numbers
-    factory.forceFirstLetter = false;
-    factory.onlyFirstWord = false;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("1st 2nd third"), MockTokenizer.WHITESPACE, false)),
-        new String[] { "1st", "2nd", "Third" });
-    
-    factory.forceFirstLetter = true;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("the The the"), MockTokenizer.KEYWORD, false)),
-        new String[] { "The The the" });
+  public void testCapitalization9() throws Exception {
+    Reader reader = new StringReader("Hello thEre my Name is Ryan");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "false",
+        "minWordLength", "3",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
+  }
+  
+  public void testCapitalization10() throws Exception {
+    Reader reader = new StringReader("McKinley");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "false",
+        "minWordLength", "3",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Mckinley" });
+  }
+  
+  /** using "McK" as okPrefix */
+  public void testCapitalization11() throws Exception {
+    Reader reader = new StringReader("McKinley");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "false",
+        "minWordLength", "3",
+        "okPrefix", "McK",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "McKinley" });
+  }
+  
+  /** test with numbers */
+  public void testCapitalization12() throws Exception {
+    Reader reader = new StringReader("1st 2nd third");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "false",
+        "minWordLength", "3",
+        "okPrefix", "McK",
+        "forceFirstLetter", "false").create(stream);
+    assertTokenStreamContents(stream, new String[] { "1st", "2nd", "Third" });
+  }
+  
+  public void testCapitalization13() throws Exception {
+    Reader reader = new StringReader("the The the");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "and the it BIG",
+        "onlyFirstWord", "false",
+        "minWordLength", "3",
+        "okPrefix", "McK",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "The The the" });
   }
 
   public void testKeepIgnoreCase() throws Exception {
-    Map<String,String> args = new HashMap<String, String>();
-    args.put( CapitalizationFilterFactory.KEEP, "kitten" );
-    args.put( CapitalizationFilterFactory.KEEP_IGNORE_CASE, "true" );
-    args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
-
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init( args );
-    factory.forceFirstLetter = true;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
-        new String[] { "KiTTEN" });
-
-    factory.forceFirstLetter = false;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
-        new String[] { "kiTTEN" });
-
-    factory.keep = null;
-    assertTokenStreamContents(factory.create(
-        new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.KEYWORD, false)),
-        new String[] { "Kitten" });
+    Reader reader = new StringReader("kiTTEN");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "kitten",
+        "keepIgnoreCase", "true",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "true").create(stream);
+
+    assertTokenStreamContents(stream, new String[] { "KiTTEN" });
+  }
+  
+  public void testKeepIgnoreCase2() throws Exception {
+    Reader reader = new StringReader("kiTTEN");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "kitten",
+        "keepIgnoreCase", "true",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "false").create(stream);
+
+    assertTokenStreamContents(stream, new String[] { "kiTTEN" });
+  }
+  
+  public void testKeepIgnoreCase3() throws Exception {
+    Reader reader = new StringReader("kiTTEN");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keepIgnoreCase", "true",
+        "onlyFirstWord", "true",
+        "forceFirstLetter", "false").create(stream);
+
+    assertTokenStreamContents(stream, new String[] { "Kitten" });
   }
   
   /**
@@ -146,16 +207,12 @@ public class TestCapitalizationFilterFac
    * This is very weird when combined with ONLY_FIRST_WORD!!!
    */
   public void testMinWordLength() throws Exception {
-    Map<String,String> args = new HashMap<String,String>();
-    args.put(CapitalizationFilterFactory.ONLY_FIRST_WORD, "true");
-    args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    Tokenizer tokenizer = new MockTokenizer(new StringReader(
-        "helo testing"), MockTokenizer.WHITESPACE, false);
-    TokenStream ts = factory.create(tokenizer);
-    assertTokenStreamContents(ts, new String[] {"helo", "Testing"});
+    Reader reader = new StringReader("helo testing");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "onlyFirstWord", "true",
+        "minWordLength", "5").create(stream);
+    assertTokenStreamContents(stream, new String[] { "helo", "Testing" });
   }
   
   /**
@@ -163,30 +220,22 @@ public class TestCapitalizationFilterFac
    * in each token (it should do nothing)
    */
   public void testMaxWordCount() throws Exception {
-    Map<String,String> args = new HashMap<String,String>();
-    args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    Tokenizer tokenizer = new MockTokenizer(new StringReader(
-        "one two three four"), MockTokenizer.WHITESPACE, false);
-    TokenStream ts = factory.create(tokenizer);
-    assertTokenStreamContents(ts, new String[] {"One", "Two", "Three", "Four"});
+    Reader reader = new StringReader("one two three four");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "maxWordCount", "2").create(stream);
+    assertTokenStreamContents(stream, new String[] { "One", "Two", "Three", "Four" });
   }
   
   /**
    * Test CapitalizationFilterFactory's maxWordCount option when exceeded
    */
   public void testMaxWordCount2() throws Exception {
-    Map<String,String> args = new HashMap<String,String>();
-    args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    Tokenizer tokenizer = new MockTokenizer(new StringReader(
-        "one two three four"), MockTokenizer.KEYWORD, false);
-    TokenStream ts = factory.create(tokenizer);
-    assertTokenStreamContents(ts, new String[] {"one two three four"});
+    Reader reader = new StringReader("one two three four");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+    stream = tokenFilterFactory("Capitalization",
+        "maxWordCount", "2").create(stream);
+    assertTokenStreamContents(stream, new String[] { "one two three four" });
   }
   
   /**
@@ -195,29 +244,32 @@ public class TestCapitalizationFilterFac
    * This is weird, it is not really a max, but inclusive (look at 'is')
    */
   public void testMaxTokenLength() throws Exception {
-    Map<String,String> args = new HashMap<String,String>();
-    args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    Tokenizer tokenizer = new MockTokenizer(new StringReader(
-        "this is a test"), MockTokenizer.WHITESPACE, false);
-    TokenStream ts = factory.create(tokenizer);
-    assertTokenStreamContents(ts, new String[] {"this", "is", "A", "test"});
+    Reader reader = new StringReader("this is a test");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "maxTokenLength", "2").create(stream);
+    assertTokenStreamContents(stream, new String[] { "this", "is", "A", "test" });
   }
   
   /**
    * Test CapitalizationFilterFactory's forceFirstLetter option
    */
-  public void testForceFirstLetter() throws Exception {
-    Map<String,String> args = new HashMap<String,String>();
-    args.put(CapitalizationFilterFactory.KEEP, "kitten");
-    args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
-    CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    Tokenizer tokenizer = new MockTokenizer(new StringReader("kitten"), MockTokenizer.WHITESPACE, false);
-    TokenStream ts = factory.create(tokenizer);
-    assertTokenStreamContents(ts, new String[] {"Kitten"});
+  public void testForceFirstLetterWithKeep() throws Exception {
+    Reader reader = new StringReader("kitten");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Capitalization",
+        "keep", "kitten",
+        "forceFirstLetter", "true").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Kitten" });
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("Capitalization", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,45 +17,38 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 
-import java.util.Map;
-import java.util.HashMap;
-
-/**
- *
- *
- **/
-public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
+public class TestKeepFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void testInform() throws Exception {
     ResourceLoader loader = new ClasspathResourceLoader(getClass());
     assertTrue("loader is null and it shouldn't be", loader != null);
-    KeepWordFilterFactory factory = new KeepWordFilterFactory();
-    Map<String, String> args = new HashMap<String, String>();
-    args.put("words", "keep-1.txt");
-    args.put("ignoreCase", "true");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(loader);
+    KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
+        "words", "keep-1.txt",
+        "ignoreCase", "true");
     CharArraySet words = factory.getWords();
     assertTrue("words is null and it shouldn't be", words != null);
     assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
 
-
-    factory = new KeepWordFilterFactory();
-    args.put("words", "keep-1.txt, keep-2.txt");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(loader);
+    factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
+        "words", "keep-1.txt, keep-2.txt",
+        "ignoreCase", "true");
     words = factory.getWords();
     assertTrue("words is null and it shouldn't be", words != null);
     assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
-
-
-
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("KeepWord", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
   }
 }
\ No newline at end of file

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Thu May 30 07:53:18 2013
@@ -42,23 +42,13 @@ public class TestKeepWordFilter extends 
     
     // Test Stopwords
     TokenStream stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-    stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
+    stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
     assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
        
     // Now force case
     stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-    stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
+    stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
     assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
-    
-    // Test Stopwords
-    stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-    stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
-    assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
-       
-    // Now force case
-    stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-    stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
-    assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
   }
   
   /** blast some random strings through the analyzer */
@@ -72,7 +62,7 @@ public class TestKeepWordFilter extends 
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-        TokenStream stream = new KeepWordFilter(true, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
+        TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
         return new TokenStreamComponents(tokenizer, stream);
       }
     };

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java Thu May 30 07:53:18 2013
@@ -3,6 +3,7 @@ package org.apache.lucene.analysis.misce
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.Locale;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -36,34 +37,68 @@ import org.junit.Test;
 public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
 
   @Test
-  public void testIncrementToken() throws IOException {
+  public void testSetFilterIncrementToken() throws IOException {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true);
     set.add("lucenefox");
     String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
         "jumps" };
     assertTokenStreamContents(new LowerCaseFilterMock(
-        new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+        new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
             "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
     CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false);
     assertTokenStreamContents(new LowerCaseFilterMock(
-        new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+        new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
             "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output);
     CharArraySet set2 = set;
     assertTokenStreamContents(new LowerCaseFilterMock(
-        new KeywordMarkerFilter(new MockTokenizer(new StringReader(
+        new SetKeywordMarkerFilter(new MockTokenizer(new StringReader(
             "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
   }
+  
+  @Test
+  public void testPatternFilterIncrementToken() throws IOException {
+    String[] output = new String[] { "the", "quick", "brown", "LuceneFox",
+        "jumps" };
+    assertTokenStreamContents(new LowerCaseFilterMock(
+        new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader(
+            "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[fF]ox"))), output);
+    
+    output = new String[] { "the", "quick", "brown", "lucenefox",
+    "jumps" };
+    
+    assertTokenStreamContents(new LowerCaseFilterMock(
+        new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader(
+            "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[f]ox"))), output);
+  }
 
   // LUCENE-2901
   public void testComposition() throws Exception {   
     TokenStream ts = new LowerCaseFilterMock(
-                     new KeywordMarkerFilter(
-                     new KeywordMarkerFilter(
+                     new SetKeywordMarkerFilter(
+                     new SetKeywordMarkerFilter(
                      new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
                      new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)), 
                      new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
     
     assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+    
+    ts = new LowerCaseFilterMock(
+        new PatternKeywordMarkerFilter(
+        new PatternKeywordMarkerFilter(
+        new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+        Pattern.compile("Birds|Houses")), 
+        Pattern.compile("Dogs|Trees")));
+
+    assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+    
+    ts = new LowerCaseFilterMock(
+        new SetKeywordMarkerFilter(
+        new PatternKeywordMarkerFilter(
+        new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false),
+        Pattern.compile("Birds|Houses")), 
+        new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false)));
+
+    assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
   }
   
   public static final class LowerCaseFilterMock extends TokenFilter {

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,52 +17,89 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.en.PorterStemFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 import org.apache.lucene.analysis.util.StringMockResourceLoader;
 
 /**
  * Simple tests to ensure the keyword marker filter factory is working.
  */
-public class TestKeywordMarkerFilterFactory extends BaseTokenStreamTestCase {
-  public void testKeywords() throws IOException {
+public class TestKeywordMarkerFilterFactory extends BaseTokenStreamFactoryTestCase {
+  
+  public void testKeywords() throws Exception {
+    Reader reader = new StringReader("dogs cats");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+        new StringMockResourceLoader("cats"),
+        "protected", "protwords.txt").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
+    assertTokenStreamContents(stream, new String[] { "dog", "cats" });
+  }
+  
+  public void testKeywords2() throws Exception {
     Reader reader = new StringReader("dogs cats");
-    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    ResourceLoader loader = new StringMockResourceLoader("cats");
-    args.put("protected", "protwords.txt");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(loader);
-    
-    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
-    assertTokenStreamContents(ts, new String[] { "dog", "cats" });
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("KeywordMarker",
+        "pattern", "cats|Dogs").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
+    assertTokenStreamContents(stream, new String[] { "dog", "cats" });
   }
   
-  public void testKeywordsCaseInsensitive() throws IOException {
+  public void testKeywordsMixed() throws Exception {
+    Reader reader = new StringReader("dogs cats birds");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+        new StringMockResourceLoader("cats"),
+        "protected", "protwords.txt",
+        "pattern", "birds|Dogs").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
+    assertTokenStreamContents(stream, new String[] { "dog", "cats", "birds" });
+  }
+  
+  public void testKeywordsCaseInsensitive() throws Exception {
     Reader reader = new StringReader("dogs cats Cats");
-    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    ResourceLoader loader = new StringMockResourceLoader("cats");
-    args.put("protected", "protwords.txt");
-    args.put("ignoreCase", "true");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(loader);
-    
-    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
-    assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" });
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+        new StringMockResourceLoader("cats"),
+        "protected", "protwords.txt",
+        "ignoreCase", "true").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
+    assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats" });
+  }
+  
+  public void testKeywordsCaseInsensitive2() throws Exception {
+    Reader reader = new StringReader("dogs cats Cats");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("KeywordMarker",
+        "pattern", "Cats",
+        "ignoreCase", "true").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);;
+    assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats" });
+  }
+  
+  public void testKeywordsCaseInsensitiveMixed() throws Exception {
+    Reader reader = new StringReader("dogs cats Cats Birds birds");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
+        new StringMockResourceLoader("cats"),
+        "protected", "protwords.txt",
+        "pattern", "birds",
+        "ignoreCase", "true").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
+    assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats", "Birds", "birds" });
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("KeywordMarker", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Thu May 30 07:53:18 2013
@@ -17,29 +17,23 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+
 public class TestLengthFilter extends BaseTokenStreamTestCase {
-  
-  public void testFilterNoPosIncr() throws Exception {
-    TokenStream stream = new MockTokenizer(
-        new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
-    LengthFilter filter = new LengthFilter(false, stream, 2, 6);
-    assertTokenStreamContents(filter,
-      new String[]{"short", "ab", "foo"},
-      new int[]{1, 1, 1}
-    );
-  }
 
   public void testFilterWithPosIncr() throws Exception {
     TokenStream stream = new MockTokenizer(
         new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
-    LengthFilter filter = new LengthFilter(true, stream, 2, 6);
+    LengthFilter filter = new LengthFilter(TEST_VERSION_CURRENT, stream, 2, 6);
     assertTokenStreamContents(filter,
       new String[]{"short", "ab", "foo"},
       new int[]{1, 4, 2}
@@ -51,7 +45,7 @@ public class TestLengthFilter extends Ba
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer tokenizer = new KeywordTokenizer(reader);
-        return new TokenStreamComponents(tokenizer, new LengthFilter(true, tokenizer, 0, 5));
+        return new TokenStreamComponents(tokenizer, new LengthFilter(TEST_VERSION_CURRENT, tokenizer, 0, 5));
       }
     };
     checkOneTermReuse(a, "", "");

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java Thu May 30 07:53:18 2013
@@ -16,35 +16,34 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
-public class TestLengthFilterFactory extends BaseTokenStreamTestCase {
+public class TestLengthFilterFactory extends BaseTokenStreamFactoryTestCase {
 
-  public void test() throws IOException {
-    LengthFilterFactory factory = new LengthFilterFactory();
-    Map<String, String> args = new HashMap<String, String>();
-    args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
-    args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
-    // default: args.put("enablePositionIncrements", "false");
-    factory.init(args);
-    String test = "foo foobar super-duper-trooper";
-    TokenStream stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
-    assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
-
-    factory = new LengthFilterFactory();
-    args = new HashMap<String, String>();
-    args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
-    args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
-    args.put("enablePositionIncrements", "true");
-    factory.init(args);
-    stream = factory.create(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
+  public void testPositionIncrements() throws Exception {
+    Reader reader = new StringReader("foo foobar super-duper-trooper");
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("Length",
+        "min", "4",
+        "max", "10").create(stream);
     assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("Length", 
+          "min", "4", 
+          "max", "5", 
+          "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java Thu May 30 07:53:18 2013
@@ -16,40 +16,46 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
-public class TestLimitTokenCountFilterFactory extends BaseTokenStreamTestCase {
+public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTestCase {
 
-  public void test() throws IOException {
-    LimitTokenCountFilterFactory factory = new LimitTokenCountFilterFactory();
-    Map<String, String> args = new HashMap<String, String>();
-    args.put(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3");
-    factory.init(args);
-    String test = "A1 B2 C3 D4 E5 F6";
-    MockTokenizer tok = new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false);
+  public void test() throws Exception {
+    Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+    MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     // LimitTokenCountFilter doesn't consume the entire stream that it wraps
-    tok.setEnableChecks(false); 
-    TokenStream stream = factory.create(tok);
+    tokenizer.setEnableChecks(false);
+    TokenStream stream = tokenizer;
+    stream = tokenFilterFactory("LimitTokenCount",
+        "maxTokenCount", "3").create(stream);
     assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+  }
 
+  public void testRequired() throws Exception {
     // param is required
-    factory = new LimitTokenCountFilterFactory();
-    args = new HashMap<String, String>();
-    IllegalArgumentException iae = null;
     try {
-      factory.init(args);
+      tokenFilterFactory("LimitTokenCount");
+      fail();
     } catch (IllegalArgumentException e) {
       assertTrue("exception doesn't mention param: " + e.getMessage(),
                  0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
-      iae = e;
     }
-    assertNotNull("no exception thrown", iae);
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("LimitTokenCount", 
+          "maxTokenCount", "3", 
+          "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,54 +17,24 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-import java.util.Iterator;
-import java.util.Arrays;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
 /** Simple tests to ensure this factory is working */
-public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenStreamTestCase {
+public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public static Token tok(int pos, String t, int start, int end) {
     Token tok = new Token(t,start,end);
     tok.setPositionIncrement(pos);
     return tok;
   }
-  public static Token tok(int pos, String t) {
-    return tok(pos, t, 0,0);
-  }
 
-  public void testDups(final String expected, final Token... tokens)
-    throws Exception {
-
-    final Iterator<Token> toks = Arrays.asList(tokens).iterator();
-    RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
-    final TokenStream ts = factory.create
-      (new TokenStream() {
-          CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-          OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-          PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-          @Override
-          public boolean incrementToken() {
-            if (toks.hasNext()) {
-              clearAttributes();
-              Token tok = toks.next();
-              termAtt.setEmpty().append(tok);
-              offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
-              posIncAtt.setPositionIncrement(tok.getPositionIncrement());
-              return true;
-            } else {
-              return false;
-            }
-          }
-        });
-    
-    assertTokenStreamContents(ts, expected.split("\\s"));   
+  public void testDups(final String expected, final Token... tokens) throws Exception {
+    TokenStream stream = new CannedTokenStream(tokens);
+    stream = tokenFilterFactory("RemoveDuplicates").create(stream);
+    assertTokenStreamContents(stream, expected.split("\\s"));   
   }
  
   public void testSimpleDups() throws Exception {
@@ -77,4 +47,14 @@ public class TestRemoveDuplicatesTokenFi
              ,tok(1,"E",21, 25)
              ); 
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("RemoveDuplicates", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Thu May 30 07:53:18 2013
@@ -1,15 +1,4 @@
 package org.apache.lucene.analysis.miscellaneous;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -26,17 +15,132 @@ import org.apache.lucene.analysis.Tokeni
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
 
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * 
+ */
 public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
   public void testOverride() throws IOException {
     // lets make booked stem to books
     // the override filter will convert "booked" to "books",
     // but also mark it with KeywordAttribute so Porter will not change it.
-    CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
-    dictionary.put("booked", "books");
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
+    builder.add("booked", "books");
     Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
-    TokenStream stream = new PorterStemFilter(
-        new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
-    assertTokenStreamContents(stream, new String[] { "books" });
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, new String[] {"books"});
+  }
+  
+  public void testIgnoreCase() throws IOException {
+    // lets make booked stem to books
+    // the override filter will convert "booked" to "books",
+    // but also mark it with KeywordAttribute so Porter will not change it.
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
+    builder.add("boOkEd", "books");
+    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, new String[] {"books"});
+  }
+
+  public void testNoOverrides() throws IOException {
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
+    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, new String[] {"book"});
+  }
+  
+  public void testRandomRealisticWhiteSpace() throws IOException {
+    Map<String,String> map = new HashMap<String,String>();
+    int numTerms = atLeast(50);
+    for (int i = 0; i < numTerms; i++) {
+      String randomRealisticUnicodeString = _TestUtil
+          .randomRealisticUnicodeString(random());
+      char[] charArray = randomRealisticUnicodeString.toCharArray();
+      StringBuilder builder = new StringBuilder();
+      for (int j = 0; j < charArray.length;) {
+        int cp = Character.codePointAt(charArray, j);
+        if (!Character.isWhitespace(cp)) {
+          builder.appendCodePoint(cp);
+        }
+        j += Character.charCount(cp);
+      }
+      if (builder.length() > 0) {
+        String value = _TestUtil.randomSimpleString(random());
+        map.put(builder.toString(),
+            value.isEmpty() ? "a" : value);
+        
+      }
+    }
+    if (map.isEmpty()) {
+      map.put("booked", "books");
+    }
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
+    Set<Entry<String,String>> entrySet = map.entrySet();
+    StringBuilder input = new StringBuilder();
+    List<String> output = new ArrayList<String>();
+    for (Entry<String,String> entry : entrySet) {
+      builder.add(entry.getKey(), entry.getValue());
+      if (random().nextBoolean() || output.isEmpty()) {
+        input.append(entry.getKey()).append(" ");
+        output.add(entry.getValue());
+      }
+    }
+    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+        new StringReader(input.toString()));
+    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+        tokenizer, builder.build()));
+    assertTokenStreamContents(stream, output.toArray(new String[0]));
+  }
+  
+  public void testRandomRealisticKeyword() throws IOException {
+    Map<String,String> map = new HashMap<String,String>();
+    int numTerms = atLeast(50);
+    for (int i = 0; i < numTerms; i++) {
+      String randomRealisticUnicodeString = _TestUtil
+          .randomRealisticUnicodeString(random());
+      if (randomRealisticUnicodeString.length() > 0) {
+        String value = _TestUtil.randomSimpleString(random());
+        map.put(randomRealisticUnicodeString,
+            value.isEmpty() ? "a" : value);
+      }
+    }
+    if (map.isEmpty()) {
+      map.put("booked", "books");
+    }
+    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
+    Set<Entry<String,String>> entrySet = map.entrySet();
+    for (Entry<String,String> entry : entrySet) {
+      builder.add(entry.getKey(), entry.getValue());
+    }
+    StemmerOverrideMap build = builder.build();
+    for (Entry<String,String> entry : entrySet) {
+      if (random().nextBoolean()) {
+        Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
+            entry.getKey()));
+        TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
+            tokenizer, build));
+        assertTokenStreamContents(stream, new String[] {entry.getValue()});
+      }
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,53 +17,49 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.en.PorterStemFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 import org.apache.lucene.analysis.util.StringMockResourceLoader;
 
 /**
  * Simple tests to ensure the stemmer override filter factory is working.
  */
-public class TestStemmerOverrideFilterFactory extends BaseTokenStreamTestCase {
-  public void testKeywords() throws IOException {
+public class TestStemmerOverrideFilterFactory extends BaseTokenStreamFactoryTestCase {
+  public void testKeywords() throws Exception {
     // our stemdict stems dogs to 'cat'
     Reader reader = new StringReader("testing dogs");
-    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
-    args.put("dictionary", "stemdict.txt");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(loader);
-    
-    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
-    assertTokenStreamContents(ts, new String[] { "test", "cat" });
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
+        new StringMockResourceLoader("dogs\tcat"),
+        "dictionary", "stemdict.txt").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
+
+    assertTokenStreamContents(stream, new String[] { "test", "cat" });
   }
   
-  public void testKeywordsCaseInsensitive() throws IOException {
+  public void testKeywordsCaseInsensitive() throws Exception {
     Reader reader = new StringReader("testing DoGs");
-    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-    StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
-    Map<String,String> args = new HashMap<String,String>();
-    ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
-    args.put("dictionary", "stemdict.txt");
-    args.put("ignoreCase", "true");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
-    factory.inform(loader);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
+        new StringMockResourceLoader("dogs\tcat"),
+        "dictionary", "stemdict.txt",
+        "ignoreCase", "true").create(stream);
+    stream = tokenFilterFactory("PorterStem").create(stream);
     
-    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
-    assertTokenStreamContents(ts, new String[] { "test", "cat" });
+    assertTokenStreamContents(stream, new String[] { "test", "cat" });
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      tokenFilterFactory("StemmerOverride", "bogusArg", "bogusValue");
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Thu May 30 07:53:18 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.misce
 
 import java.io.IOException;
 import java.io.Reader;
-import java.util.Collection;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -28,7 +27,13 @@ import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
 
 /**
  */
@@ -46,30 +51,9 @@ public class TestTrimFilter extends Base
                     new Token(ccc, 0, ccc.length, 11, 15),
                     new Token(whitespace, 0, whitespace.length, 16, 20),
                     new Token(empty, 0, empty.length, 21, 21));
-    ts = new TrimFilter(ts, false);
+    ts = new TrimFilter(TEST_VERSION_CURRENT, ts);
 
     assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""});
-
-    a = " a".toCharArray();
-    b = "b ".toCharArray();
-    ccc = " c ".toCharArray();
-    whitespace = "   ".toCharArray();
-    ts = new IterTokenStream(
-            new Token(a, 0, a.length, 0, 2),
-            new Token(b, 0, b.length, 0, 2),
-            new Token(ccc, 0, ccc.length, 0, 3),
-            new Token(whitespace, 0, whitespace.length, 0, 3));
-    ts = new TrimFilter(ts, true);
-    
-    assertTokenStreamContents(ts, 
-        new String[] { "a", "b", "c", "" },
-        new int[] { 1, 0, 1, 3 },
-        new int[] { 2, 1, 2, 3 },
-        null,
-        new int[] { 1, 1, 1, 1 },
-        null,
-        null,
-        false);
   }
   
   /**
@@ -91,10 +75,6 @@ public class TestTrimFilter extends Base
       this.tokens = tokens;
     }
     
-    public IterTokenStream(Collection<Token> tokens) {
-      this(tokens.toArray(new Token[tokens.size()]));
-    }
-    
     @Override
     public boolean incrementToken() throws IOException {
       if (index >= tokens.length)
@@ -120,20 +100,10 @@ public class TestTrimFilter extends Base
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
-        return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, false));
+        return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer));
       } 
     };
     checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
-    
-    Analyzer b = new Analyzer() {
-
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
-        return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, true));
-      } 
-    };
-    checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
   }
   
   public void testEmptyTerm() throws IOException {
@@ -141,7 +111,8 @@ public class TestTrimFilter extends Base
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer tokenizer = new KeywordTokenizer(reader);
-        return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer, random().nextBoolean()));
+        final Version version = TEST_VERSION_CURRENT;
+        return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer));
       }
     };
     checkOneTermReuse(a, "", "");