You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC

svn commit: r1487777 [13/50] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...

Modified: lucene/dev/branches/security/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -25,11 +25,10 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 import org.apache.lucene.util.IOUtils;
 
 import com.ibm.icu.lang.UCharacter;
@@ -75,21 +74,17 @@ import com.ibm.icu.text.RuleBasedBreakIt
  *                rulefiles="Latn:my.Latin.rules.rbbi,Cyrl:my.Cyrillic.rules.rbbi"/>
  *   </analyzer>
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
   static final String RULEFILES = "rulefiles";
-  private Map<Integer,String> tailored;
+  private final Map<Integer,String> tailored;
   private ICUTokenizerConfig config;
   
-  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
-  public ICUTokenizerFactory() {}
-
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
+  /** Creates a new ICUTokenizerFactory */
+  public ICUTokenizerFactory(Map<String,String> args) {
+    super(args);
     tailored = new HashMap<Integer,String>();
-    String rulefilesArg = args.get(RULEFILES);
+    String rulefilesArg = get(args, RULEFILES);
     if (rulefilesArg != null) {
       List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
       for (String scriptAndResourcePath : scriptAndResourcePaths) {
@@ -99,6 +94,9 @@ public class ICUTokenizerFactory extends
         tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath);
       }
     }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
 
   @Override
@@ -144,8 +142,8 @@ public class ICUTokenizerFactory extends
   }
 
   @Override
-  public Tokenizer create(Reader input) {
+  public ICUTokenizer create(AttributeFactory factory, Reader input) {
     assert config != null : "inform must be called first!";
-    return new ICUTokenizer(input, config);
+    return new ICUTokenizer(factory, input, config);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,11 +19,11 @@ package org.apache.lucene.analysis.icu;
 
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.HashMap;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
 
 /** basic tests for {@link ICUFoldingFilterFactory} */
 public class TestICUFoldingFilterFactory extends BaseTokenStreamTestCase {
@@ -31,10 +31,21 @@ public class TestICUFoldingFilterFactory
   /** basic tests to ensure the folding is working */
   public void test() throws Exception {
     Reader reader = new StringReader("Résumé");
-    ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
-    TokenStream stream = factory.create(tokenizer);
+    ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory(new HashMap<String,String>());
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] { "resume" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new ICUFoldingFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java Thu May 30 07:53:18 2013
@@ -19,13 +19,11 @@ package org.apache.lucene.analysis.icu;
 
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /** basic tests for {@link ICUNormalizer2FilterFactory} */
 public class TestICUNormalizer2FilterFactory extends BaseTokenStreamTestCase {
@@ -33,14 +31,23 @@ public class TestICUNormalizer2FilterFac
   /** Test nfkc_cf defaults */
   public void testDefaults() throws Exception {
     Reader reader = new StringReader("This is a Test");
-    ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
-    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
-    TokenStream stream = factory.create(tokenizer);
+    ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory(new HashMap<String,String>());
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
   }
   
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new ICUNormalizer2FilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
+  
   // TODO: add tests for different forms
 }

Modified: lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java Thu May 30 07:53:18 2013
@@ -23,9 +23,8 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /** basic tests for {@link ICUTransformFilterFactory} */
 public class TestICUTransformFilterFactory extends BaseTokenStreamTestCase {
@@ -33,33 +32,48 @@ public class TestICUTransformFilterFacto
   /** ensure the transform is working */
   public void test() throws Exception {
     Reader reader = new StringReader("簡化字");
-    ICUTransformFilterFactory factory = new ICUTransformFilterFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("id", "Traditional-Simplified");
-    factory.init(args);
-    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
-    TokenStream stream = factory.create(tokenizer);
+    ICUTransformFilterFactory factory = new ICUTransformFilterFactory(args);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] { "简化字" });
   }
   
   /** test forward and reverse direction */
-  public void testDirection() throws Exception {
+  public void testForwardDirection() throws Exception {
     // forward
     Reader reader = new StringReader("Российская Федерация");
-    ICUTransformFilterFactory factory = new ICUTransformFilterFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("id", "Cyrillic-Latin");
-    factory.init(args);
-    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
-    TokenStream stream = factory.create(tokenizer);
+    ICUTransformFilterFactory factory = new ICUTransformFilterFactory(args);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] { "Rossijskaâ",  "Federaciâ" });
-    
+  }
+  
+  public void testReverseDirection() throws Exception {
     // backward (invokes Latin-Cyrillic)
-    reader = new StringReader("Rossijskaâ Federaciâ");
+    Reader reader = new StringReader("Rossijskaâ Federaciâ");
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("id", "Cyrillic-Latin");
     args.put("direction", "reverse");
-    factory.init(args);
-    tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
-    stream = factory.create(tokenizer);
+    ICUTransformFilterFactory factory = new ICUTransformFilterFactory(args);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] { "Российская", "Федерация" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new ICUTransformFilterFactory(new HashMap<String,String>() {{
+        put("id", "Null");
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -30,8 +30,7 @@ import org.apache.lucene.analysis.util.C
 public class TestICUTokenizerFactory extends BaseTokenStreamTestCase {
   public void testMixedText() throws Exception {
     Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
-    ICUTokenizerFactory factory = new ICUTokenizerFactory();
-    factory.init(new HashMap<String,String>());
+    ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
     factory.inform(new ClasspathResourceLoader(getClass()));
     TokenStream stream = factory.create(reader);
     assertTokenStreamContents(stream,
@@ -43,10 +42,9 @@ public class TestICUTokenizerFactory ext
     // “ U+201C LEFT DOUBLE QUOTATION MARK; ” U+201D RIGHT DOUBLE QUOTATION MARK
     Reader reader = new StringReader
         ("  Don't,break.at?/(punct)!  \u201Cnice\u201D\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$   ");
-    ICUTokenizerFactory factory = new ICUTokenizerFactory();
     final Map<String,String> args = new HashMap<String,String>();
     args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
-    factory.init(args);
+    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
     factory.inform(new ClasspathResourceLoader(this.getClass()));
     TokenStream stream = factory.create(reader);
     assertTokenStreamContents(stream,
@@ -57,10 +55,9 @@ public class TestICUTokenizerFactory ext
   public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
     Reader reader = new StringReader
         ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
-    ICUTokenizerFactory factory = new ICUTokenizerFactory();
     final Map<String,String> args = new HashMap<String,String>();
     args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
-    factory.init(args);
+    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
     factory.inform(new ClasspathResourceLoader(getClass()));
     TokenStream stream = factory.create(reader);
     assertTokenStreamContents(stream,
@@ -77,10 +74,9 @@ public class TestICUTokenizerFactory ext
   public void testKeywordTokenizeCyrillicAndThai() throws Exception {
     Reader reader = new StringReader
         ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
-    ICUTokenizerFactory factory = new ICUTokenizerFactory();
     final Map<String,String> args = new HashMap<String,String>();
     args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
-    factory.init(args);
+    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
     factory.inform(new ClasspathResourceLoader(getClass()));
     TokenStream stream = factory.create(reader);
     assertTokenStreamContents(stream, new String[] { "Some", "English",
@@ -88,4 +84,16 @@ public class TestICUTokenizerFactory ext
         "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
         "More", "English" });
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new ICUTokenizerFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java Thu May 30 07:53:18 2013
@@ -89,7 +89,7 @@ public class JapaneseAnalyzer extends St
   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
     Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
     TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
-    stream = new JapanesePartOfSpeechStopFilter(true, stream, stoptags);
+    stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags);
     stream = new CJKWidthFilter(stream);
     stream = new StopFilter(matchVersion, stream, stopwords);
     stream = new JapaneseKatakanaStemFilter(stream);

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java Thu May 30 07:53:18 2013
@@ -22,7 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 
@@ -32,7 +32,7 @@ import org.apache.lucene.analysis.tokena
  * This acts as a lemmatizer for verbs and adjectives.
  * <p>
  * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  * the {@link KeywordAttribute} before this {@link TokenStream}.
  * </p>
  */

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.ja;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ja.JapaneseBaseFormFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -34,6 +36,14 @@ import org.apache.lucene.analysis.util.T
  */
 public class JapaneseBaseFormFilterFactory extends TokenFilterFactory {
 
+  /** Creates a new JapaneseBaseFormFilterFactory */
+  public JapaneseBaseFormFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new JapaneseBaseFormFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java Thu May 30 07:53:18 2013
@@ -39,12 +39,20 @@ import java.util.Map;
 public class JapaneseIterationMarkCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
 
   private static final String NORMALIZE_KANJI_PARAM = "normalizeKanji";
-
   private static final String NORMALIZE_KANA_PARAM = "normalizeKana";
 
-  private boolean normalizeKanji = true;
-
-  private boolean normalizeKana = true;
+  private final boolean normalizeKanji;
+  private final boolean normalizeKana;
+  
+  /** Creates a new JapaneseIterationMarkCharFilterFactory */
+  public JapaneseIterationMarkCharFilterFactory(Map<String,String> args) {
+    super(args);
+    normalizeKanji = getBoolean(args, NORMALIZE_KANJI_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT);
+    normalizeKana = getBoolean(args, NORMALIZE_KANA_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
 
   @Override
   public CharFilter create(Reader input) {
@@ -52,13 +60,6 @@ public class JapaneseIterationMarkCharFi
   }
 
   @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    normalizeKanji = getBoolean(NORMALIZE_KANJI_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT);
-    normalizeKana = getBoolean(NORMALIZE_KANA_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT);
-  }
-
-  @Override
   public AbstractAnalysisFactory getMultiTermComponent() {
     return this;
   }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java Thu May 30 07:53:18 2013
@@ -35,7 +35,7 @@ import java.io.IOException;
  * </p>
  * <p>
  * In order to prevent terms from being stemmed, use an instance of
- * {@link org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter}
+ * {@link org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter}
  * or a custom {@link TokenFilter} that sets the {@link KeywordAttribute}
  * before this {@link TokenStream}.
  * </p>

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -37,15 +37,18 @@ import java.util.Map;
  */
 public class JapaneseKatakanaStemFilterFactory extends TokenFilterFactory {
   private static final String MINIMUM_LENGTH_PARAM = "minimumLength";
-  private int minimumLength;
+  private final int minimumLength;
   
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    minimumLength = getInt(MINIMUM_LENGTH_PARAM, JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH);
+  /** Creates a new JapaneseKatakanaStemFilterFactory */
+  public JapaneseKatakanaStemFilterFactory(Map<String,String> args) {
+    super(args);
+    minimumLength = getInt(args, MINIMUM_LENGTH_PARAM, JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH);
     if (minimumLength < 2) {
       throw new IllegalArgumentException("Illegal " + MINIMUM_LENGTH_PARAM + " " + minimumLength + " (must be 2 or greater)");
     }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
 
   @Override

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java Thu May 30 07:53:18 2013
@@ -22,6 +22,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
 import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.Version;
 
 /**
  * Removes tokens that match a set of part-of-speech tags.
@@ -30,8 +31,14 @@ public final class JapanesePartOfSpeechS
   private final Set<String> stopTags;
   private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
 
-  public JapanesePartOfSpeechStopFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTags) {
-    super(enablePositionIncrements, input);
+  /**
+   * Create a new {@link JapanesePartOfSpeechStopFilter}.
+   * @param version  the Lucene match version
+   * @param input    the {@link TokenStream} to consume
+   * @param stopTags the part-of-speech tags that should be removed
+   */
+  public JapanesePartOfSpeechStopFilter(Version version, TokenStream input, Set<String> stopTags) {
+    super(version, input);
     this.stopTags = stopTags;
   }
 

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,11 +19,14 @@ package org.apache.lucene.analysis.ja;
 
 import java.io.IOException;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
  * Factory for {@link org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter}.
@@ -32,20 +35,26 @@ import org.apache.lucene.analysis.util.*
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.JapaneseTokenizerFactory"/&gt;
  *     &lt;filter class="solr.JapanesePartOfSpeechStopFilterFactory"
- *             tags="stopTags.txt" 
- *             enablePositionIncrements="true"/&gt;
+ *             tags="stopTags.txt"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;
  * </pre>
  */
-public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware  {
-  private boolean enablePositionIncrements;
+public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  private final String stopTagFiles;
   private Set<String> stopTags;
 
+  /** Creates a new JapanesePartOfSpeechStopFilterFactory */
+  public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) {
+    super(args);
+    stopTagFiles = get(args, "tags");
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public void inform(ResourceLoader loader) throws IOException {
-    String stopTagFiles = args.get("tags");
-    enablePositionIncrements = getBoolean("enablePositionIncrements", false);
     stopTags = null;
     CharArraySet cas = getWordSet(loader, stopTagFiles, false);
     if (cas != null) {
@@ -60,6 +69,11 @@ public class JapanesePartOfSpeechStopFil
   @Override
   public TokenStream create(TokenStream stream) {
     // if stoptags is null, it means the file is empty
-    return stopTags == null ? stream : new JapanesePartOfSpeechStopFilter(enablePositionIncrements, stream, stopTags);
+    if (stopTags != null) {
+      final TokenStream filter = new JapanesePartOfSpeechStopFilter(luceneMatchVersion, stream, stopTags);
+      return filter;
+    } else {
+      return stream;
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java Thu May 30 07:53:18 2013
@@ -37,12 +37,15 @@ import java.util.Map;
  */
 public class JapaneseReadingFormFilterFactory extends TokenFilterFactory {
   private static final String ROMAJI_PARAM = "useRomaji";
-  private boolean useRomaji;
+  private final boolean useRomaji;
   
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    useRomaji = getBoolean(ROMAJI_PARAM, false);
+  /** Creates a new JapaneseReadingFormFilterFactory */
+  public JapaneseReadingFormFilterFactory(Map<String,String> args) {
+    super(args);
+    useRomaji = getBoolean(args, ROMAJI_PARAM, false);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
 
   @Override

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu May 30 07:53:18 2013
@@ -187,6 +187,8 @@ public final class JapaneseTokenizer ext
 
   /**
    * Create a new JapaneseTokenizer.
+   * <p>
+   * Uses the default AttributeFactory.
    * 
    * @param input Reader containing text
    * @param userDictionary Optional: if non-null, user dictionary.
@@ -194,7 +196,21 @@ public final class JapaneseTokenizer ext
    * @param mode tokenization mode.
    */
   public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
-    super(input);
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
+  }
+
+  /**
+   * Create a new JapaneseTokenizer.
+   *
+   * @param factory the AttributeFactory to use
+   * @param input Reader containing text
+   * @param userDictionary Optional: if non-null, user dictionary.
+   * @param discardPunctuation true if punctuation tokens should be dropped from the output.
+   * @param mode tokenization mode.
+   */
+  public JapaneseTokenizer
+      (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
+    super(factory, input);
     dictionary = TokenInfoDictionary.getInstance();
     fst = dictionary.getFST();
     unkDictionary = UnknownDictionary.getInstance();

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -27,11 +27,10 @@ import java.nio.charset.CodingErrorActio
 import java.util.Locale;
 import java.util.Map;
 
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.ja.JapaneseTokenizer;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
 import org.apache.lucene.analysis.ja.dict.UserDictionary;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
@@ -63,17 +62,28 @@ public class JapaneseTokenizerFactory ex
 
   private UserDictionary userDictionary;
 
-  private Mode mode;
-
-  private boolean discardPunctuation;
-
+  private final Mode mode;
+  private final boolean discardPunctuation;  
+  private final String userDictionaryPath;
+  private final String userDictionaryEncoding;
+
+  /** Creates a new JapaneseTokenizerFactory */
+  public JapaneseTokenizerFactory(Map<String,String> args) {
+    super(args);
+    mode = Mode.valueOf(get(args, MODE, JapaneseTokenizer.DEFAULT_MODE.toString()).toUpperCase(Locale.ROOT));
+    userDictionaryPath = args.remove(USER_DICT_PATH);
+    userDictionaryEncoding = args.remove(USER_DICT_ENCODING);
+    discardPunctuation = getBoolean(args, DISCARD_PUNCTUATION, true);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public void inform(ResourceLoader loader) throws IOException {
-    mode = getMode(args);
-    String userDictionaryPath = args.get(USER_DICT_PATH);
     if (userDictionaryPath != null) {
       InputStream stream = loader.openResource(userDictionaryPath);
-      String encoding = args.get(USER_DICT_ENCODING);
+      String encoding = userDictionaryEncoding;
       if (encoding == null) {
         encoding = IOUtils.UTF_8;
       }
@@ -85,20 +95,10 @@ public class JapaneseTokenizerFactory ex
     } else {
       userDictionary = null;
     }
-    discardPunctuation = getBoolean(DISCARD_PUNCTUATION, true);
   }
   
   @Override
-  public Tokenizer create(Reader input) {
-    return new JapaneseTokenizer(input, userDictionary, discardPunctuation, mode);
-  }
-  
-  private Mode getMode(Map<String, String> args) {
-    String mode = args.get(MODE);
-    if (mode != null) {
-      return Mode.valueOf(mode.toUpperCase(Locale.ROOT));
-    } else {
-      return JapaneseTokenizer.DEFAULT_MODE;
-    }
+  public JapaneseTokenizer create(AttributeFactory factory, Reader input) {
+    return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java Thu May 30 07:53:18 2013
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.ja;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
 
 import org.apache.lucene.analysis.util.ResourceLoader;
 
@@ -34,12 +32,21 @@ class StringMockResourceLoader implement
   }
 
   @Override
+  public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+    try {
+      return Class.forName(cname).asSubclass(expectedType);
+    } catch (Exception e) {
+      throw new RuntimeException("Cannot load class: " + cname, e);
+    }
+  }
+
+  @Override
   public <T> T newInstance(String cname, Class<T> expectedType) {
+    Class<? extends T> clazz = findClass(cname, expectedType);
     try {
-      Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
       return clazz.newInstance();
     } catch (Exception e) {
-      throw new RuntimeException(e);
+      throw new RuntimeException("Cannot create instance: " + cname, e);
     }
   }
 

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java Thu May 30 07:53:18 2013
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 
 public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase {
@@ -49,7 +49,7 @@ public class TestJapaneseBaseFormFilter 
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE);
-        TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
         return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink));
       }
     };

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,8 +19,7 @@ package org.apache.lucene.analysis.ja;
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
@@ -30,16 +29,25 @@ import org.apache.lucene.analysis.TokenS
  */
 public class TestJapaneseBaseFormFilterFactory extends BaseTokenStreamTestCase {
   public void testBasics() throws IOException {
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    tokenizerFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    tokenizerFactory.init(args);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
     TokenStream ts = tokenizerFactory.create(new StringReader("それはまだ実験段階にあります"));
-    JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory();
+    JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory(new HashMap<String,String>());
     ts = factory.create(ts);
     assertTokenStreamContents(ts,
         new String[] { "それ", "は", "まだ", "実験", "段階", "に", "ある", "ます"  }
     );
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new JapaneseBaseFormFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java Thu May 30 07:53:18 2013
@@ -24,7 +24,6 @@ import org.apache.lucene.analysis.TokenS
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -35,22 +34,17 @@ public class TestJapaneseIterationMarkCh
 
   public void testIterationMarksWithKeywordTokenizer() throws IOException {
     final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
-    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
+    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
     CharFilter filter = filterFactory.create(new StringReader(text));
     TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
     assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
   }
 
   public void testIterationMarksWithJapaneseTokenizer() throws IOException {
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    Map<String, String> tokenizerArgs = Collections.emptyMap();
-    tokenizerFactory.init(tokenizerArgs);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
 
-    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
-    Map<String, String> filterArgs = Collections.emptyMap();
-    filterFactory.init(filterArgs);
-
+    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
     CharFilter filter = filterFactory.create(
         new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
     );
@@ -59,16 +53,13 @@ public class TestJapaneseIterationMarkCh
   }
 
   public void testKanjiOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    Map<String, String> tokenizerArgs = Collections.emptyMap();
-    tokenizerFactory.init(tokenizerArgs);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
 
-    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
     Map<String, String> filterArgs = new HashMap<String, String>();
     filterArgs.put("normalizeKanji", "true");
     filterArgs.put("normalizeKana", "false");
-    filterFactory.init(filterArgs);
+    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
     
     CharFilter filter = filterFactory.create(
         new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
@@ -78,16 +69,13 @@ public class TestJapaneseIterationMarkCh
   }
 
   public void testKanaOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    Map<String, String> tokenizerArgs = Collections.emptyMap();
-    tokenizerFactory.init(tokenizerArgs);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
 
-    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
     Map<String, String> filterArgs = new HashMap<String, String>();
     filterArgs.put("normalizeKanji", "false");
     filterArgs.put("normalizeKana", "true");
-    filterFactory.init(filterArgs);
+    JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
 
     CharFilter filter = filterFactory.create(
         new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
@@ -95,4 +83,16 @@ public class TestJapaneseIterationMarkCh
     TokenStream tokenStream = tokenizerFactory.create(filter);
     assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java Thu May 30 07:53:18 2013
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 
 import java.io.IOException;
@@ -70,7 +70,7 @@ public class TestJapaneseKatakanaStemFil
       @Override
       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-        TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
+        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
         return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink));
       }
     };

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -22,27 +22,34 @@ import org.apache.lucene.analysis.TokenS
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
 
 /**
  * Simple tests for {@link JapaneseKatakanaStemFilterFactory}
  */
 public class TestJapaneseKatakanaStemFilterFactory extends BaseTokenStreamTestCase {
   public void testKatakanaStemming() throws IOException {
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    Map<String, String> tokenizerArgs = Collections.emptyMap();
-    tokenizerFactory.init(tokenizerArgs);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
     TokenStream tokenStream = tokenizerFactory.create(
         new StringReader("明後日パーティーに行く予定がある。図書館で資料をコピーしました。")
     );
-    JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory();
-    Map<String, String> filterArgs = Collections.emptyMap();
-    filterFactory.init(filterArgs);
+    JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory(new HashMap<String,String>());;
     assertTokenStreamContents(filterFactory.create(tokenStream),
         new String[]{ "明後日", "パーティ", "に", "行く", "予定", "が", "ある",   // パーティー should be stemmed
                       "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"} // コピー should not be stemmed
     );
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new JapaneseKatakanaStemFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java Thu May 30 07:53:18 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ja;
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -35,21 +34,30 @@ public class TestJapanesePartOfSpeechSto
         "#  verb-main:\n" +
         "動詞-自立\n";
     
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    tokenizerFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> tokenizerArgs = Collections.emptyMap();
-    tokenizerFactory.init(tokenizerArgs);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
     TokenStream ts = tokenizerFactory.create(new StringReader("私は制限スピードを超える。"));
-    JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory();
     Map<String,String> args = new HashMap<String,String>();
+    args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
     args.put("tags", "stoptags.txt");
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(args);
+    JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory(args);
     factory.inform(new StringMockResourceLoader(tags));
     ts = factory.create(ts);
     assertTokenStreamContents(ts,
         new String[] { "私", "は", "制限", "スピード", "を" }
     );
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new JapanesePartOfSpeechStopFilterFactory(new HashMap<String,String>() {{
+        put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java Thu May 30 07:53:18 2013
@@ -22,22 +22,31 @@ import org.apache.lucene.analysis.TokenS
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
 
 /**
  * Simple tests for {@link JapaneseReadingFormFilterFactory}
  */
 public class TestJapaneseReadingFormFilterFactory extends BaseTokenStreamTestCase {
   public void testReadings() throws IOException {
-    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
-    Map<String, String> args = Collections.emptyMap();
-    tokenizerFactory.init(args);
+    JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     tokenizerFactory.inform(new StringMockResourceLoader(""));
     TokenStream tokenStream = tokenizerFactory.create(new StringReader("先ほどベルリンから来ました。"));
-    JapaneseReadingFormFilterFactory filterFactory = new JapaneseReadingFormFilterFactory();
+    JapaneseReadingFormFilterFactory filterFactory = new JapaneseReadingFormFilterFactory(new HashMap<String,String>());
     assertTokenStreamContents(filterFactory.create(tokenStream),
         new String[] { "サキ", "ホド", "ベルリン", "カラ", "キ", "マシ", "タ" }
     );
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new JapaneseReadingFormFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenS
  */
 public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase {
   public void testSimple() throws IOException {
-    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
+    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     factory.inform(new StringMockResourceLoader(""));
     TokenStream ts = factory.create(new StringReader("これは本ではない"));
     assertTokenStreamContents(ts,
@@ -48,10 +45,7 @@ public class TestJapaneseTokenizerFactor
    * Test that search mode is enabled and working by default
    */
   public void testDefaults() throws IOException {
-    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    Map<String, String> args = Collections.emptyMap();
-    factory.init(args);
+    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String,String>());
     factory.inform(new StringMockResourceLoader(""));
     TokenStream ts = factory.create(new StringReader("シニアソフトウェアエンジニア"));
     assertTokenStreamContents(ts,
@@ -63,10 +57,9 @@ public class TestJapaneseTokenizerFactor
    * Test mode parameter: specifying normal mode
    */
   public void testMode() throws IOException {
-    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("mode", "normal");
-    factory.init(args);
+    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
     factory.inform(new StringMockResourceLoader(""));
     TokenStream ts = factory.create(new StringReader("シニアソフトウェアエンジニア"));
     assertTokenStreamContents(ts,
@@ -84,10 +77,9 @@ public class TestJapaneseTokenizerFactor
         "関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,テスト名詞\n" +
         "# Custom reading for sumo wrestler\n" +
         "朝青龍,朝青龍,アサショウリュウ,カスタム人名\n";
-    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("userDictionary", "userdict.txt");
-    factory.init(args);
+    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
     factory.inform(new StringMockResourceLoader(userDict));
     TokenStream ts = factory.create(new StringReader("関西国際空港に行った"));
     assertTokenStreamContents(ts,
@@ -99,15 +91,13 @@ public class TestJapaneseTokenizerFactor
    * Test preserving punctuation
    */
   public void testPreservePunctuation() throws IOException {
-    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
     Map<String,String> args = new HashMap<String,String>();
     args.put("discardPunctuation", "false");
-    factory.init(args);
+    JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
     factory.inform(new StringMockResourceLoader(""));
     TokenStream ts = factory.create(
         new StringReader("今ノルウェーにいますが、来週の頭日本に戻ります。楽しみにしています!お寿司が食べたいな。。。")
     );
-    System.out.println(ts.toString());
     assertTokenStreamContents(ts,
         new String[] { "今", "ノルウェー", "に", "い", "ます", "が", "、",
             "来週", "の", "頭", "日本", "に", "戻り", "ます", "。",
@@ -115,4 +105,16 @@ public class TestJapaneseTokenizerFactor
             "お", "寿司", "が", "食べ", "たい", "な", "。", "。", "。"}
     );
   }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new JapaneseTokenizerFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java Thu May 30 07:53:18 2013
@@ -24,8 +24,6 @@ import java.util.Map;
 import morfologik.stemming.PolishStemmer.DICTIONARY;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.morfologik.MorfologikFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
@@ -52,24 +50,10 @@ public class MorfologikFilterFactory ext
   /** Schema attribute. */
   public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
   
-  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
-  public MorfologikFilterFactory() {}
-
-  /**
-   * {@inheritDoc}
-   */
-  @Override
-  public TokenStream create(TokenStream ts) {
-    return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
-    String dictionaryName = args.get(DICTIONARY_SCHEMA_ATTRIBUTE);
+  /** Creates a new MorfologikFilterFactory */
+  public MorfologikFilterFactory(Map<String,String> args) {
+    super(args);
+    String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
     if (dictionaryName != null && !dictionaryName.isEmpty()) {
       try {
         DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
@@ -81,5 +65,13 @@ public class MorfologikFilterFactory ext
             + dictionaryName);
       }
     }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public TokenStream create(TokenStream ts) {
+    return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Thu May 30 07:53:18 2013
@@ -52,6 +52,14 @@ public class TestMorfologikAnalyzer exte
       new int[] { 0, 0, 0, 0, 7, 7, 7, 7 },
       new int[] { 6, 6, 6, 6, 13, 13, 13, 13 },
       new int[] { 1, 0, 0, 0, 1, 0, 0, 0 });
+
+    assertAnalyzesToReuse(
+        a,
+        "T. Gl\u00FCcksberg",
+        new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
+        new int[] { 0, 0, 0, 3  },
+        new int[] { 1, 1, 1, 13 },
+        new int[] { 1, 0, 0, 1  });
   }
 
   /** Test reuse of MorfologikFilter with leftover stems. */

Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Thu May 30 07:53:18 2013
@@ -22,8 +22,8 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /**
  * Test for {@link MorfologikFilterFactory}.
@@ -34,11 +34,21 @@ public class TestMorfologikFilterFactory
     Map<String,String> initParams = new HashMap<String,String>();
     initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
         "morfologik");
-    MorfologikFilterFactory factory = new MorfologikFilterFactory();
-    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
-    factory.init(initParams);
-    TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT,
-        reader));
-    assertTokenStreamContents(ts, new String[] {"rower", "bilet"});
+    MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
+    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+    stream = factory.create(stream);
+    assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    try {
+      new MorfologikFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("Unknown parameters"));
+    }
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Thu May 30 07:53:18 2013
@@ -27,7 +27,6 @@ import org.apache.commons.codec.language
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
 /**
@@ -48,13 +47,11 @@ public final class BeiderMorseFilter ext
   private final Matcher matcher = pattern.matcher("");
   // encoded representation
   private String encoded;
-  // offsets for any buffered outputs
-  private int startOffset;
-  private int endOffset;
+  // preserves all attributes for any buffered outputs
+  private State state;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   
   
   /**
@@ -83,10 +80,10 @@ public final class BeiderMorseFilter ext
   @Override
   public boolean incrementToken() throws IOException {
     if (matcher.find()) {
-      clearAttributes();
+      assert state != null && encoded != null;
+      restoreState(state);
       termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));
       posIncAtt.setPositionIncrement(0);
-      offsetAtt.setOffset(startOffset, endOffset);
       return true;
     }
     
@@ -94,8 +91,7 @@ public final class BeiderMorseFilter ext
       encoded = (languages == null) 
           ? engine.encode(termAtt.toString())
           : engine.encode(termAtt.toString(), languages);
-      startOffset = offsetAtt.startOffset();
-      endOffset = offsetAtt.endOffset();
+      state = captureState();
       matcher.reset(encoded);
       if (matcher.find()) {
         termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,22 +17,19 @@ package org.apache.lucene.analysis.phone
  * limitations under the License.
  */
 
-import java.util.Arrays;
-import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.commons.codec.language.bm.Languages.LanguageSet;
 import org.apache.commons.codec.language.bm.NameType;
 import org.apache.commons.codec.language.bm.PhoneticEngine;
 import org.apache.commons.codec.language.bm.RuleType;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
  * Factory for {@link BeiderMorseFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_bm" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -42,36 +39,27 @@ import org.apache.lucene.analysis.util.T
  *     &lt;/filter&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class BeiderMorseFilterFactory extends TokenFilterFactory {
-  private PhoneticEngine engine;
-  private LanguageSet languageSet;
+  private final PhoneticEngine engine;
+  private final LanguageSet languageSet;
   
-  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
-  public BeiderMorseFilterFactory() {}
-  
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
-    
+  /** Creates a new BeiderMorseFilterFactory */
+  public BeiderMorseFilterFactory(Map<String,String> args) {
+    super(args);
     // PhoneticEngine = NameType + RuleType + concat
     // we use common-codec's defaults: GENERIC + APPROX + true
-    String nameTypeArg = args.get("nameType");
-    NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);
-
-    String ruleTypeArg = args.get("ruleType");
-    RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);
+    NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
+    RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));
     
-    boolean concat = getBoolean("concat", true);
+    boolean concat = getBoolean(args, "concat", true);
     engine = new PhoneticEngine(nameType, ruleType, concat);
     
     // LanguageSet: defaults to automagic, otherwise a comma-separated list.
-    String languageSetArg = args.get("languageSet");
-    if (languageSetArg == null || languageSetArg.equals("auto")) {
-      languageSet = null;
-    } else {
-      languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
+    Set<String> langs = getSet(args, "languageSet");
+    languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
     }
   }
 

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java Thu May 30 07:53:18 2013
@@ -21,19 +21,17 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
  * Factory for {@link DoubleMetaphoneFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
  *     &lt;filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class DoubleMetaphoneFilterFactory extends TokenFilterFactory
 {
@@ -44,20 +42,16 @@ public class DoubleMetaphoneFilterFactor
   /** default maxCodeLength if not specified */
   public static final int DEFAULT_MAX_CODE_LENGTH = 4;
 
-  private boolean inject = true;
-  private int maxCodeLength = DEFAULT_MAX_CODE_LENGTH;
+  private final boolean inject;
+  private final int maxCodeLength;
 
-  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
-  public DoubleMetaphoneFilterFactory() {}
-
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-
-    inject = getBoolean(INJECT, true);
-
-    if (args.get(MAX_CODE_LENGTH) != null) {
-      maxCodeLength = Integer.parseInt(args.get(MAX_CODE_LENGTH));
+  /** Creates a new DoubleMetaphoneFilterFactory */
+  public DoubleMetaphoneFilterFactory(Map<String,String> args) {
+    super(args);
+    inject = getBoolean(args, INJECT, true);
+    maxCodeLength = getInt(args, MAX_CODE_LENGTH, DEFAULT_MAX_CODE_LENGTH);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
     }
   }
 

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java Thu May 30 07:53:18 2013
@@ -25,9 +25,13 @@ import java.util.Locale;
 import java.util.Map;
 
 import org.apache.commons.codec.Encoder;
-import org.apache.commons.codec.language.*;
+import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.ColognePhonetic;
+import org.apache.commons.codec.language.DoubleMetaphone;
+import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.RefinedSoundex;
+import org.apache.commons.codec.language.Soundex;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -49,7 +53,7 @@ import org.apache.lucene.analysis.util.T
  *  support this then specifying this is an error.</dd>
  * </dl>
  *
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_phonetic" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@@ -59,9 +63,7 @@ import org.apache.lucene.analysis.util.T
  * 
  * @see PhoneticFilter
  */
-public class PhoneticFilterFactory extends TokenFilterFactory
-  implements ResourceLoaderAware
-{
+public class PhoneticFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
   /** parameter name: either a short name or a full class name */
   public static final String ENCODER = "encoder";
   /** parameter name: true if encoded tokens should be added as synonyms */
@@ -82,33 +84,36 @@ public class PhoneticFilterFactory exten
     registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class);
   }
 
-  boolean inject = true; //accessed by the test
-  private String name = null;
+  final boolean inject; //accessed by the test
+  private final String name;
+  private final Integer maxCodeLength;
   private Class<? extends Encoder> clazz = null;
   private Method setMaxCodeLenMethod = null;
-  private Integer maxCodeLength = null;
   
-  /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
-  public PhoneticFilterFactory() {}
+  /** Creates a new PhoneticFilterFactory */
+  public PhoneticFilterFactory(Map<String,String> args) {
+    super(args);
+    inject = getBoolean(args, INJECT, true);
+    name = require(args, ENCODER);
+    String v = get(args, MAX_CODE_LENGTH);
+    if (v != null) {
+      maxCodeLength = Integer.valueOf(v);
+    } else {
+      maxCodeLength = null;
+    }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
 
   @Override
   public void inform(ResourceLoader loader) throws IOException {
-
-    inject = getBoolean(INJECT, true);
-
-    String name = args.get( ENCODER );
-    if( name == null ) {
-      throw new IllegalArgumentException("Missing required parameter: " + ENCODER
-          + " [" + registry.keySet() + "]");
-    }
     clazz = registry.get(name.toUpperCase(Locale.ROOT));
     if( clazz == null ) {
       clazz = resolveEncoder(name, loader);
     }
 
-    String v = args.get(MAX_CODE_LENGTH);
-    if (v != null) {
-      maxCodeLength = Integer.valueOf(v);
+    if (maxCodeLength != null) {
       try {
         setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
       } catch (Exception e) {

Modified: lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Thu May 30 07:53:18 2013
@@ -19,7 +19,9 @@ package org.apache.lucene.analysis.phone
 
 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.HashSet;
+import java.util.regex.Pattern;
 
 import org.apache.commons.codec.language.bm.NameType;
 import org.apache.commons.codec.language.bm.PhoneticEngine;
@@ -29,7 +31,10 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.junit.Ignore;
 
 /** Tests {@link BeiderMorseFilter} */
@@ -103,4 +108,20 @@ public class TestBeiderMorseFilter exten
     };
     checkOneTermReuse(a, "", "");
   }
+  
+  public void testCustomAttribute() throws IOException {
+    TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+    stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
+    stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
+    KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
+    stream.reset();
+    int i = 0;
+    while(stream.incrementToken()) {
+      assertTrue(keyAtt.isKeyword());
+      i++;
+    }
+    assertEquals(12, i);
+    stream.end();
+    stream.close();
+  }
 }