You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC

svn commit: r1487777 [7/50] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/mav...

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
  * Portuguese words.
  * <p>
  * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  * the {@link KeywordAttribute} before this {@link TokenStream}.
  * </p>
  */

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
  * Factory for {@link PortugueseLightStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
  *     &lt;filter class="solr.PortugueseLightStemFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class PortugueseLightStemFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new PortugueseLightStemFilterFactory */
+  public PortugueseLightStemFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new PortugueseLightStemFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
  * Portuguese words.
  * <p>
  * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  * the {@link KeywordAttribute} before this {@link TokenStream}.
  * </p>
  */

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
  * Factory for {@link PortugueseMinimalStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
  *     &lt;filter class="solr.PortugueseMinimalStemFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class PortugueseMinimalStemFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new PortugueseMinimalStemFilterFactory */
+  public PortugueseMinimalStemFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new PortugueseMinimalStemFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
  * Portuguese words.
  * <p>
  * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  * the {@link KeywordAttribute} before this {@link TokenStream}.
  * </p>
  */

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pt.PortugueseStemFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
  * Factory for {@link PortugueseStemFilter}. 
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
  *     &lt;filter class="solr.PortugueseStemFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class PortugueseStemFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new PortugueseStemFilterFactory */
+  public PortugueseStemFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new PortugueseStemFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.rever
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.reverse.ReverseStringFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
  * Factory for {@link ReverseStringFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@@ -31,13 +33,21 @@ import org.apache.lucene.analysis.util.T
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
  *
- *
  * @since solr 1.4
  */
 public class ReverseStringFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new ReverseStringFilterFactory */
+  public ReverseStringFilterFactory(Map<String,String> args) {
+    super(args);
+    assureMatchVersion();
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public ReverseStringFilter create(TokenStream in) {
-    assureMatchVersion();
     return new ReverseStringFilter(luceneMatchVersion,in);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java Thu May 30 07:53:18 2013
@@ -23,7 +23,7 @@ import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -94,7 +94,7 @@ public final class RomanianAnalyzer exte
 
   /**
    * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
-   * provided this analyzer will add a {@link KeywordMarkerFilter} before
+   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
    * stemming.
    * 
    * @param matchVersion lucene compatibility version
@@ -116,7 +116,7 @@ public final class RomanianAnalyzer exte
    *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
    *         built from an {@link StandardTokenizer} filtered with
    *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
-   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         , {@link SetKeywordMarkerFilter} if a stem exclusion set is
    *         provided and {@link SnowballFilter}.
    */
   @Override
@@ -127,7 +127,7 @@ public final class RomanianAnalyzer exte
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if(!stemExclusionSet.isEmpty())
-      result = new KeywordMarkerFilter(result, stemExclusionSet);
+      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     result = new SnowballFilter(result, new RomanianStemmer());
     return new TokenStreamComponents(source, result);
   }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Thu May 30 07:53:18 2013
@@ -29,7 +29,7 @@ import org.apache.lucene.analysis.util.S
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.IOUtils;
@@ -111,7 +111,7 @@ public final class RussianAnalyzer exten
    * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
    *         built from a {@link StandardTokenizer} filtered with
    *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
-   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         , {@link SetKeywordMarkerFilter} if a stem exclusion set is
    *         provided, and {@link SnowballFilter}
    */
     @Override
@@ -122,7 +122,7 @@ public final class RussianAnalyzer exten
       result = new LowerCaseFilter(matchVersion, result);
       result = new StopFilter(matchVersion, result, stopwords);
       if (!stemExclusionSet.isEmpty()) 
-        result = new KeywordMarkerFilter(result, stemExclusionSet);
+        result = new SetKeywordMarkerFilter(result, stemExclusionSet);
       result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
       return new TokenStreamComponents(source, result);
     }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
  * words.
  * <p>
  * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  * the {@link KeywordAttribute} before this {@link TokenStream}.
  * </p>
  */

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.ru;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ru.RussianLightStemFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
  * Factory for {@link RussianLightStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
  *     &lt;filter class="solr.RussianLightStemFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class RussianLightStemFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new RussianLightStemFilterFactory */
+  public RussianLightStemFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new RussianLightStemFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.shing
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
@@ -25,7 +24,7 @@ import java.util.Map;
 
 /** 
  * Factory for {@link ShingleFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@@ -33,41 +32,37 @@ import java.util.Map;
  *             outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class ShingleFilterFactory extends TokenFilterFactory {
-  private int minShingleSize;
-  private int maxShingleSize;
-  private boolean outputUnigrams;
-  private boolean outputUnigramsIfNoShingles;
-  private String tokenSeparator;
+  private final int minShingleSize;
+  private final int maxShingleSize;
+  private final boolean outputUnigrams;
+  private final boolean outputUnigramsIfNoShingles;
+  private final String tokenSeparator;
 
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    maxShingleSize = getInt("maxShingleSize", 
-                            ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+  /** Creates a new ShingleFilterFactory */
+  public ShingleFilterFactory(Map<String, String> args) {
+    super(args);
+    maxShingleSize = getInt(args, "maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
     if (maxShingleSize < 2) {
-      throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize
-                              + ") - must be at least 2");
+      throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize + ") - must be at least 2");
     }
-    minShingleSize = getInt("minShingleSize",
-                            ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
+    minShingleSize = getInt(args, "minShingleSize", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
     if (minShingleSize < 2) {
-      throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
-                              + ") - must be at least 2");
+      throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be at least 2");
     }
     if (minShingleSize > maxShingleSize) {
-      throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
-                              + ") - must be no greater than maxShingleSize ("
-                              + maxShingleSize + ")");
+      throw new IllegalArgumentException
+          ("Invalid minShingleSize (" + minShingleSize + ") - must be no greater than maxShingleSize (" + maxShingleSize + ")");
+    }
+    outputUnigrams = getBoolean(args, "outputUnigrams", true);
+    outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
+    tokenSeparator = get(args, "tokenSeparator", ShingleFilter.TOKEN_SEPARATOR);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
     }
-    outputUnigrams = getBoolean("outputUnigrams", true);
-    outputUnigramsIfNoShingles = getBoolean("outputUnigramsIfNoShingles", false);
-    tokenSeparator = args.containsKey("tokenSeparator")
-                     ? args.get("tokenSeparator")
-                     : ShingleFilter.TOKEN_SEPARATOR;
   }
+
   @Override
   public ShingleFilter create(TokenStream input) {
     ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Thu May 30 07:53:18 2013
@@ -37,6 +37,18 @@ import org.tartarus.snowball.SnowballPro
  *  <li>For other languages, see {@link LowerCaseFilter}.
  * </ul>
  * </p>
+ *
+ * <p>
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ * </p>
+ *
+ *
  */
 public final class SnowballFilter extends TokenFilter {
 

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,20 @@ package org.apache.lucene.analysis.snowb
 import java.util.Map;
 import java.io.IOException;
 
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.tartarus.snowball.SnowballProgram;
 
 /**
  * Factory for {@link SnowballFilter}, with configurable language
  * <p>
  * Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -39,26 +41,30 @@ import org.tartarus.snowball.SnowballPro
  *     &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- * 
- *
  */
 public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
   public static final String PROTECTED_TOKENS = "protected";
 
-  private String language = "English";
+  private final String language;
+  private final String wordFiles;
   private Class<? extends SnowballProgram> stemClass;
   private CharArraySet protectedWords = null;
+  
+  /** Creates a new SnowballPorterFilterFactory */
+  public SnowballPorterFilterFactory(Map<String,String> args) {
+    super(args);
+    language = get(args, "language", "English");
+    wordFiles = get(args, PROTECTED_TOKENS);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
 
   @Override
   public void inform(ResourceLoader loader) throws IOException {
-    String cfgLanguage = args.get("language");
-    if (cfgLanguage != null)
-      language = cfgLanguage;
-
     String className = "org.tartarus.snowball.ext." + language + "Stemmer";
     stemClass = loader.newInstance(className, SnowballProgram.class).getClass();
 
-    String wordFiles = args.get(PROTECTED_TOKENS);
     if (wordFiles != null) {
       protectedWords = getWordSet(loader, wordFiles, false);
     }
@@ -74,7 +80,7 @@ public class SnowballPorterFilterFactory
     }
 
     if (protectedWords != null)
-      input = new KeywordMarkerFilter(input, protectedWords);
+      input = new SetKeywordMarkerFilter(input, protectedWords);
     return new SnowballFilter(input, program);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.stand
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -24,17 +26,24 @@ import org.apache.lucene.analysis.standa
 
 /**
  * Factory for {@link ClassicFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
  *     &lt;filter class="solr.ClassicFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
- *
  */
 public class ClassicFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new ClassicFilterFactory */
+  public ClassicFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenFilter create(TokenStream input) {
     return new ClassicFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Thu May 30 07:53:18 2013
@@ -104,14 +104,6 @@ public final class ClassicTokenizer exte
   }
 
   /**
-   * Creates a new ClassicTokenizer with a given {@link AttributeSource}. 
-   */
-  public ClassicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
-    super(source, input);
-    init(matchVersion);
-  }
-
-  /**
    * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
    */
   public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,41 +17,37 @@ package org.apache.lucene.analysis.stand
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.ClassicTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 
 import java.io.Reader;
 import java.util.Map;
 
 /**
  * Factory for {@link ClassicTokenizer}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
- *
  */
-
 public class ClassicTokenizerFactory extends TokenizerFactory {
+  private final int maxTokenLength;
 
-  private int maxTokenLength;
-
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
+  /** Creates a new ClassicTokenizerFactory */
+  public ClassicTokenizerFactory(Map<String,String> args) {
+    super(args);
     assureMatchVersion();
-    maxTokenLength = getInt("maxTokenLength", 
-                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+    maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
 
   @Override
-  public Tokenizer create(Reader input) {
-    ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, input); 
+  public ClassicTokenizer create(AttributeFactory factory, Reader input) {
+    ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input); 
     tokenizer.setMaxTokenLength(maxTokenLength);
     return tokenizer;
   }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java Thu May 30 07:53:18 2013
@@ -25,20 +25,23 @@ import org.apache.lucene.analysis.util.T
 
 /**
  * Factory for {@link StandardFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
  *     &lt;filter class="solr.StandardFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class StandardFilterFactory extends TokenFilterFactory {
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
+  
+  /** Creates a new StandardFilterFactory */
+  public StandardFilterFactory(Map<String,String> args) {
+    super(args);
     assureMatchVersion();
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
   
   @Override

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu May 30 07:53:18 2013
@@ -118,14 +118,6 @@ public final class StandardTokenizer ext
   }
 
   /**
-   * Creates a new StandardTokenizer with a given {@link AttributeSource}. 
-   */
-  public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
-    super(source, input);
-    init(matchVersion);
-  }
-
-  /**
    * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
    */
   public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,40 +17,37 @@ package org.apache.lucene.analysis.stand
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 
 import java.io.Reader;
 import java.util.Map;
 
 /**
  * Factory for {@link StandardTokenizer}. 
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre> 
- *
  */
-
 public class StandardTokenizerFactory extends TokenizerFactory {
+  private final int maxTokenLength;
   
-  private int maxTokenLength;
-  
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
+  /** Creates a new StandardTokenizerFactory */
+  public StandardTokenizerFactory(Map<String,String> args) {
+    super(args);
     assureMatchVersion();
-    maxTokenLength = getInt("maxTokenLength", 
-                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+    maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
 
   @Override
-  public StandardTokenizer create(Reader input) {
-    StandardTokenizer tokenizer
-      = new StandardTokenizer(luceneMatchVersion, input); 
+  public StandardTokenizer create(AttributeFactory factory, Reader input) {
+    StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input); 
     tokenizer.setMaxTokenLength(maxTokenLength);
     return tokenizer;
   }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Thu May 30 07:53:18 2013
@@ -102,14 +102,6 @@ public final class UAX29URLEmailTokenize
   }
 
   /**
-   * Creates a new UAX29URLEmailTokenizer with a given {@link AttributeSource}. 
-   */
-  public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) {
-    super(source, input);
-    this.scanner = getScannerFor(matchVersion);
-  }
-
-  /**
    * Creates a new UAX29URLEmailTokenizer with a given {@link AttributeFactory} 
    */
   public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,40 +17,37 @@ package org.apache.lucene.analysis.stand
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
 
 import java.io.Reader;
 import java.util.Map;
 
 /**
  * Factory for {@link UAX29URLEmailTokenizer}. 
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre> 
- *
- * 
  */
-
 public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
+  private final int maxTokenLength;
 
-  private int maxTokenLength;
-
-  @Override
-  public void init(Map<String,String> args) {
-    super.init(args);
+  /** Creates a new UAX29URLEmailTokenizerFactory */
+  public UAX29URLEmailTokenizerFactory(Map<String,String> args) {
+    super(args);
     assureMatchVersion();
-    maxTokenLength = getInt("maxTokenLength",
-                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+    maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
   }
 
   @Override
-  public UAX29URLEmailTokenizer create(Reader input) {
-    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input); 
+  public UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input) {
+    UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input); 
     tokenizer.setMaxTokenLength(maxTokenLength);
     return tokenizer;
   }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java Thu May 30 07:53:18 2013
@@ -23,7 +23,7 @@ import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -91,7 +91,7 @@ public final class SwedishAnalyzer exten
 
   /**
    * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
-   * provided this analyzer will add a {@link KeywordMarkerFilter} before
+   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
    * stemming.
    * 
    * @param matchVersion lucene compatibility version
@@ -113,7 +113,7 @@ public final class SwedishAnalyzer exten
    *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
    *         built from an {@link StandardTokenizer} filtered with
    *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
-   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         , {@link SetKeywordMarkerFilter} if a stem exclusion set is
    *         provided and {@link SnowballFilter}.
    */
   @Override
@@ -124,7 +124,7 @@ public final class SwedishAnalyzer exten
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if(!stemExclusionSet.isEmpty())
-      result = new KeywordMarkerFilter(result, stemExclusionSet);
+      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     result = new SnowballFilter(result, new SwedishStemmer());
     return new TokenStreamComponents(source, result);
   }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
  * words.
  * <p>
  * To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
  * the {@link KeywordAttribute} before this {@link TokenStream}.
  * </p>
  */

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.sv;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /** 
  * Factory for {@link SwedishLightStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
  *     &lt;filter class="solr.SwedishLightStemFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class SwedishLightStemFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new SwedishLightStemFilterFactory */
+  public SwedishLightStemFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new SwedishLightStemFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java Thu May 30 07:53:18 2013
@@ -25,35 +25,74 @@ import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.text.ParseException;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.synonym.SynonymFilter;
-import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.apache.lucene.analysis.synonym.SolrSynonymParser;
-import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.util.Version;
 
 /**
  * Factory for {@link SynonymFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
  *     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
  *             format="solr" ignoreCase="false" expand="true" 
- *             tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
+ *             tokenizerFactory="solr.WhitespaceTokenizerFactory"
+ *             [optional tokenizer factory parameters]/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
+ * 
+ * <p>
+ * An optional param name prefix of "tokenizerFactory." may be used for any 
+ * init params that the SynonymFilterFactory needs to pass to the specified 
+ * TokenizerFactory.  If the TokenizerFactory expects an init parameters with 
+ * the same name as an init param used by the SynonymFilterFactory, the prefix 
+ * is mandatory.
+ * </p>
  */
 public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  private final boolean ignoreCase;
+  private final String tokenizerFactory;
+  private final String synonyms;
+  private final String format;
+  private final boolean expand;
+  private final Map<String, String> tokArgs = new HashMap<String, String>();
+
   private SynonymMap map;
-  private boolean ignoreCase;
+  
+  public SynonymFilterFactory(Map<String,String> args) {
+    super(args);
+    ignoreCase = getBoolean(args, "ignoreCase", false);
+    synonyms = require(args, "synonyms");
+    format = get(args, "format");
+    expand = getBoolean(args, "expand", true);
+
+    tokenizerFactory = get(args, "tokenizerFactory");
+    if (tokenizerFactory != null) {
+      assureMatchVersion();
+      tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
+      for (Iterator<String> itr = args.keySet().iterator(); itr.hasNext();) {
+        String key = itr.next();
+        tokArgs.put(key.replaceAll("^tokenizerFactory\\.",""), args.get(key));
+        itr.remove();
+      }
+    }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
   
   @Override
   public TokenStream create(TokenStream input) {
@@ -64,12 +103,7 @@ public class SynonymFilterFactory extend
 
   @Override
   public void inform(ResourceLoader loader) throws IOException {
-    final boolean ignoreCase = getBoolean("ignoreCase", false); 
-    this.ignoreCase = ignoreCase;
-
-    String tf = args.get("tokenizerFactory");
-
-    final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
+    final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
     
     Analyzer analyzer = new Analyzer() {
       @Override
@@ -80,7 +114,6 @@ public class SynonymFilterFactory extend
       }
     };
 
-    String format = args.get("format");
     try {
       if (format == null || format.equals("solr")) {
         // TODO: expose dedup as a parameter?
@@ -99,12 +132,7 @@ public class SynonymFilterFactory extend
   /**
    * Load synonyms from the solr format, "format=solr".
    */
-  private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
-    final boolean expand = getBoolean("expand", true);
-    String synonyms = args.get("synonyms");
-    if (synonyms == null)
-      throw new IllegalArgumentException("Missing required argument 'synonyms'.");
-    
+  private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {    
     CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
       .onMalformedInput(CodingErrorAction.REPORT)
       .onUnmappableCharacter(CodingErrorAction.REPORT);
@@ -128,11 +156,6 @@ public class SynonymFilterFactory extend
    * Load synonyms from the wordnet format, "format=wordnet".
    */
   private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
-    final boolean expand = getBoolean("expand", true);
-    String synonyms = args.get("synonyms");
-    if (synonyms == null)
-      throw new IllegalArgumentException("Missing required argument 'synonyms'.");
-    
     CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
       .onMalformedInput(CodingErrorAction.REPORT)
       .onUnmappableCharacter(CodingErrorAction.REPORT);
@@ -154,12 +177,15 @@ public class SynonymFilterFactory extend
   
   // (there are no tests for this functionality)
   private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
-    TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
-    tokFactory.setLuceneMatchVersion(luceneMatchVersion);
-    tokFactory.init(args);
-    if (tokFactory instanceof ResourceLoaderAware) {
-      ((ResourceLoaderAware) tokFactory).inform(loader);
+    Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
+    try {
+      TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
+      if (tokFactory instanceof ResourceLoaderAware) {
+        ((ResourceLoaderAware) tokFactory).inform(loader);
+      }
+      return tokFactory;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
     }
-    return tokFactory;
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java Thu May 30 07:53:18 2013
@@ -90,26 +90,21 @@ public class SynonymMap {
     public static CharsRef join(String[] words, CharsRef reuse) {
       int upto = 0;
       char[] buffer = reuse.chars;
-      for(String word : words) {
-        if (upto > 0) {
-          if (upto >= buffer.length) {
-            reuse.grow(upto);
-            buffer = reuse.chars;
-          }
-          buffer[upto++] = SynonymMap.WORD_SEPARATOR;
-        }
-
-        final int wordLen =  word.length();
-        final int needed = upto + wordLen;
+      for (String word : words) {
+        final int wordLen = word.length();
+        final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
         if (needed > buffer.length) {
           reuse.grow(needed);
           buffer = reuse.chars;
         }
+        if (upto > 0) {
+          buffer[upto++] = SynonymMap.WORD_SEPARATOR;
+        }
 
         word.getChars(0, wordLen, buffer, upto);
         upto += wordLen;
       }
-
+      reuse.length = upto;
       return reuse;
     }
     

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.th;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.th.ThaiWordFilter;
 
 import org.apache.lucene.analysis.TokenStream;
@@ -24,19 +26,27 @@ import org.apache.lucene.analysis.util.T
 
 /** 
  * Factory for {@link ThaiWordFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
  *     &lt;filter class="solr.ThaiWordFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
- *
  */
 public class ThaiWordFilterFactory extends TokenFilterFactory {
+  
+  /** Creates a new ThaiWordFilterFactory */
+  public ThaiWordFilterFactory(Map<String,String> args) {
+    super(args);
+    assureMatchVersion();
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public ThaiWordFilter create(TokenStream input) {
-    assureMatchVersion();
     return new ThaiWordFilter(luceneMatchVersion, input);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java Thu May 30 07:53:18 2013
@@ -22,7 +22,7 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -93,7 +93,7 @@ public final class TurkishAnalyzer exten
 
   /**
    * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
-   * provided this analyzer will add a {@link KeywordMarkerFilter} before
+   * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
    * stemming.
    * 
    * @param matchVersion lucene compatibility version
@@ -115,7 +115,7 @@ public final class TurkishAnalyzer exten
    *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
    *         built from an {@link StandardTokenizer} filtered with
    *         {@link StandardFilter}, {@link TurkishLowerCaseFilter},
-   *         {@link StopFilter}, {@link KeywordMarkerFilter} if a stem
+   *         {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem
    *         exclusion set is provided and {@link SnowballFilter}.
    */
   @Override
@@ -126,7 +126,7 @@ public final class TurkishAnalyzer exten
     result = new TurkishLowerCaseFilter(result);
     result = new StopFilter(matchVersion, result, stopwords);
     if(!stemExclusionSet.isEmpty())
-      result = new KeywordMarkerFilter(result, stemExclusionSet);
+      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
     result = new SnowballFilter(result, new TurkishStemmer());
     return new TokenStreamComponents(source, result);
   }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.tr;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.T
 
 /** 
  * Factory for {@link TurkishLowerCaseFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
  * &lt;fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
  *     &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
  *   &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre> 
- *
+ * &lt;/fieldType&gt;</pre>
  */
 public class TurkishLowerCaseFilterFactory extends TokenFilterFactory  implements MultiTermAwareComponent {
+  
+  /** Creates a new TurkishLowerCaseFilterFactory */
+  public TurkishLowerCaseFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
   @Override
   public TokenStream create(TokenStream input) {
     return new TurkishLowerCaseFilter(input);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Thu May 30 07:53:18 2013
@@ -28,10 +28,14 @@ import java.io.Reader;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
@@ -41,37 +45,33 @@ import java.util.regex.PatternSyntaxExce
  * <p>
  * The typical lifecycle for a factory consumer is:
  * <ol>
- *   <li>Create factory via its a no-arg constructor
- *   <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
- *   <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
+ *   <li>Create factory via its constructor (or via XXXFactory.forName)
  *   <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
  *   <li>Consumer calls create() to obtain instances.
  * </ol>
  */
 public abstract class AbstractAnalysisFactory {
+  public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
 
-  /** The original args, before init() processes them */
-  private Map<String,String> originalArgs;
-  
-  /** The init args */
-  protected Map<String,String> args;
+  /** The original args, before any processing */
+  private final Map<String,String> originalArgs;
 
   /** the luceneVersion arg */
-  protected Version luceneMatchVersion = null;
+  protected final Version luceneMatchVersion;
+  /** whether the luceneMatchVersion arg is explicitly specified in the serialized schema */
+  private boolean isExplicitLuceneMatchVersion = false;
 
   /**
    * Initialize this factory via a set of key-value pairs.
    */
-  public void init(Map<String,String> args) {
-    originalArgs = Collections.unmodifiableMap(args);
-    this.args = new HashMap<String,String>(args);
-  }
-
-  public Map<String,String> getArgs() {
-    return args;
+  protected AbstractAnalysisFactory(Map<String,String> args) {
+    originalArgs = Collections.unmodifiableMap(new HashMap<String,String>(args));
+    String version = get(args, LUCENE_MATCH_VERSION_PARAM);
+    luceneMatchVersion = version == null ? null : Version.parseLeniently(version);
+    args.remove(CLASS_NAME);  // consume the class arg
   }
   
-  public Map<String,String> getOriginalArgs() {
+  public final Map<String,String> getOriginalArgs() {
     return originalArgs;
   }
 
@@ -85,56 +85,139 @@ public abstract class AbstractAnalysisFa
     }
   }
 
-  public void setLuceneMatchVersion(Version luceneMatchVersion) {
-    this.luceneMatchVersion = luceneMatchVersion;
+  public final Version getLuceneMatchVersion() {
+    return this.luceneMatchVersion;
+  }
+  
+  public String require(Map<String,String> args, String name) {
+    String s = args.remove(name);
+    if (s == null) {
+      throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
+    }
+    return s;
+  }
+  public String require(Map<String,String> args, String name, Collection<String> allowedValues) {
+    return require(args, name, allowedValues, true);
+  }
+  public String require(Map<String,String> args, String name, Collection<String> allowedValues, boolean caseSensitive) {
+    String s = args.remove(name);
+    if (s == null) {
+      throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
+    } else {
+      for (String allowedValue : allowedValues) {
+        if (caseSensitive) {
+          if (s.equals(allowedValue)) {
+            return s;
+          }
+        } else {
+          if (s.equalsIgnoreCase(allowedValue)) {
+            return s;
+          }
+        }
+      }
+      throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+    }
+  }
+  public String get(Map<String,String> args, String name) {
+    return args.remove(name); // defaultVal = null
+  }
+  public String get(Map<String,String> args, String name, String defaultVal) {
+    String s = args.remove(name);
+    return s == null ? defaultVal : s;
+  }
+  public String get(Map<String,String> args, String name, Collection<String> allowedValues) {
+    return get(args, name, allowedValues, null); // defaultVal = null
+  }
+  public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal) {
+    return get(args, name, allowedValues, defaultVal, true);
+  }
+  public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal, boolean caseSensitive) {
+    String s = args.remove(name);
+    if (s == null) {
+      return defaultVal;
+    } else {
+      for (String allowedValue : allowedValues) {
+        if (caseSensitive) {
+          if (s.equals(allowedValue)) {
+            return s;
+          }
+        } else {
+          if (s.equalsIgnoreCase(allowedValue)) {
+            return s;
+          }
+        }
+      }
+      throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+    }
   }
 
-  public Version getLuceneMatchVersion() {
-    return this.luceneMatchVersion;
+  protected final int requireInt(Map<String,String> args, String name) {
+    return Integer.parseInt(require(args, name));
+  }
+  protected final int getInt(Map<String,String> args, String name, int defaultVal) {
+    String s = args.remove(name);
+    return s == null ? defaultVal : Integer.parseInt(s);
   }
 
-  protected int getInt(String name) {
-    return getInt(name, -1, false);
+  protected final boolean requireBoolean(Map<String,String> args, String name) {
+    return Boolean.parseBoolean(require(args, name));
+  }
+  protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
+    String s = args.remove(name);
+    return s == null ? defaultVal : Boolean.parseBoolean(s);
   }
 
-  protected int getInt(String name, int defaultVal) {
-    return getInt(name, defaultVal, true);
+  protected final float requireFloat(Map<String,String> args, String name) {
+    return Float.parseFloat(require(args, name));
+  }
+  protected final float getFloat(Map<String,String> args, String name, float defaultVal) {
+    String s = args.remove(name);
+    return s == null ? defaultVal : Float.parseFloat(s);
   }
 
-  protected int getInt(String name, int defaultVal, boolean useDefault) {
-    String s = args.get(name);
+  public char requireChar(Map<String,String> args, String name) {
+    return require(args, name).charAt(0);
+  }
+  public char getChar(Map<String,String> args, String name, char defaultValue) {
+    String s = args.remove(name);
     if (s == null) {
-      if (useDefault) {
-        return defaultVal;
+      return defaultValue;
+    } else { 
+      if (s.length() != 1) {
+        throw new IllegalArgumentException(name + " should be a char. \"" + s + "\" is invalid");
+      } else {
+        return s.charAt(0);
       }
-      throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
     }
-    return Integer.parseInt(s);
-  }
-
-  protected boolean getBoolean(String name, boolean defaultVal) {
-    return getBoolean(name, defaultVal, true);
   }
+  
+  private static final Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
 
-  protected boolean getBoolean(String name, boolean defaultVal, boolean useDefault) {
-    String s = args.get(name);
-    if (s==null) {
-      if (useDefault) return defaultVal;
-      throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
+  /** Returns whitespace- and/or comma-separated set of values, or null if none are found */
+  public Set<String> getSet(Map<String,String> args, String name) {
+    String s = args.remove(name);
+    if (s == null) {
+     return null;
+    } else {
+      Set<String> set = null;
+      Matcher matcher = ITEM_PATTERN.matcher(s);
+      if (matcher.find()) {
+        set = new HashSet<String>();
+        set.add(matcher.group(0));
+        while (matcher.find()) {
+          set.add(matcher.group(0));
+        }
+      }
+      return set;
     }
-    return Boolean.parseBoolean(s);
   }
 
   /**
    * Compiles a pattern for the value of the specified argument key <code>name</code> 
    */
-  protected Pattern getPattern(String name) {
+  protected final Pattern getPattern(Map<String,String> args, String name) {
     try {
-      String pat = args.get(name);
-      if (null == pat) {
-        throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
-      }
-      return Pattern.compile(args.get(name));
+      return Pattern.compile(require(args, name));
     } catch (PatternSyntaxException e) {
       throw new IllegalArgumentException
         ("Configuration Error: '" + name + "' can not be parsed in " +
@@ -146,7 +229,7 @@ public abstract class AbstractAnalysisFa
    * Returns as {@link CharArraySet} from wordFiles, which
    * can be a comma-separated list of filenames
    */
-  protected CharArraySet getWordSet(ResourceLoader loader,
+  protected final CharArraySet getWordSet(ResourceLoader loader,
       String wordFiles, boolean ignoreCase) throws IOException {
     assureMatchVersion();
     List<String> files = splitFileNames(wordFiles);
@@ -168,13 +251,13 @@ public abstract class AbstractAnalysisFa
   /**
    * Returns the resource's lines (with content treated as UTF-8)
    */
-  protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+  protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
     return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
   }
 
   /** same as {@link #getWordSet(ResourceLoader, String, boolean)},
    * except the input is in snowball format. */
-  protected CharArraySet getSnowballWordSet(ResourceLoader loader,
+  protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
       String wordFiles, boolean ignoreCase) throws IOException {
     assureMatchVersion();
     List<String> files = splitFileNames(wordFiles);
@@ -209,7 +292,7 @@ public abstract class AbstractAnalysisFa
    * @param fileNames the string containing file names
    * @return a list of file names with the escaping backslashed removed
    */
-  protected List<String> splitFileNames(String fileNames) {
+  protected final List<String> splitFileNames(String fileNames) {
     if (fileNames == null)
       return Collections.<String>emptyList();
 
@@ -220,4 +303,28 @@ public abstract class AbstractAnalysisFa
 
     return result;
   }
+
+  private static final String CLASS_NAME = "class";
+  
+  /**
+   * @return the string used to specify the concrete class name in a serialized representation: the class arg.  
+   *         If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}.
+   */ 
+  public String getClassArg() {
+    if (null != originalArgs) {
+      String className = originalArgs.get(CLASS_NAME);
+      if (null != className) {
+        return className;
+      }
+    }
+    return getClass().getName();
+  }
+
+  public boolean isExplicitLuceneMatchVersion() {
+    return isExplicitLuceneMatchVersion;
+  }
+
+  public void setExplicitLuceneMatchVersion(boolean isExplicitLuceneMatchVersion) {
+    this.isExplicitLuceneMatchVersion = isExplicitLuceneMatchVersion;
+  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java Thu May 30 07:53:18 2013
@@ -52,6 +52,11 @@ final class AnalysisSPILoader<S extends 
   public AnalysisSPILoader(Class<S> clazz, String[] suffixes, ClassLoader classloader) {
     this.clazz = clazz;
     this.suffixes = suffixes;
+    // if clazz' classloader is not a parent of the given one, we scan clazz's classloader, too:
+    final ClassLoader clazzClassloader = clazz.getClassLoader();
+    if (clazzClassloader != null && !SPIClassIterator.isParentClassLoader(clazzClassloader, classloader)) {
+      reload(clazzClassloader);
+    }
     reload(classloader);
   }
   
@@ -99,10 +104,10 @@ final class AnalysisSPILoader<S extends 
     this.services = Collections.unmodifiableMap(services);
   }
   
-  public S newInstance(String name) {
+  public S newInstance(String name, Map<String,String> args) {
     final Class<? extends S> service = lookupClass(name);
     try {
-      return service.newInstance();
+      return service.getConstructor(Map.class).newInstance(args);
     } catch (Exception e) {
       throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name '"+name+"' cannot be instantiated. " +
             "This is likely due to a misconfiguration of the java class '" + service.getName() + "': ", e);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Thu May 30 07:53:18 2013
@@ -215,12 +215,9 @@ public class CharArrayMap<V> extends Abs
    * The user should never modify this text array after calling this method.
    */
   public V put(char[] text, V value) {
-    if (ignoreCase)
-      for(int i=0;i<text.length;){
-        i += Character.toChars(
-              Character.toLowerCase(
-                  charUtils.codePointAt(text, i)), text, i);
-      }
+    if (ignoreCase) {
+      charUtils.toLowerCase(text, 0, text.length);
+    }
     int slot = getSlot(text, 0, text.length);
     if (keys[slot] != null) {
       final V oldValue = values[slot];

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Thu May 30 07:53:18 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
  */
 
 import java.io.Reader;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.CharFilter;
@@ -32,8 +33,8 @@ public abstract class CharFilterFactory 
       new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
   
   /** looks up a charfilter by name from context classpath */
-  public static CharFilterFactory forName(String name) {
-    return loader.newInstance(name);
+  public static CharFilterFactory forName(String name, Map<String,String> args) {
+    return loader.newInstance(name, args);
   }
   
   /** looks up a charfilter class by name from context classpath */
@@ -61,6 +62,13 @@ public abstract class CharFilterFactory 
     loader.reload(classloader);
   }
 
+  /**
+   * Initialize this factory via a set of key-value pairs.
+   */
+  protected CharFilterFactory(Map<String,String> args) {
+    super(args);
+  }
+
   /** Wraps the given Reader with a CharFilter. */
   public abstract Reader create(Reader input);
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Thu May 30 07:53:18 2013
@@ -51,22 +51,6 @@ public abstract class CharTokenizer exte
    * 
    * @param matchVersion
    *          Lucene version to match
-   * @param source
-   *          the attribute source to use for this {@link Tokenizer}
-   * @param input
-   *          the input to split up into tokens
-   */
-  public CharTokenizer(Version matchVersion, AttributeSource source,
-      Reader input) {
-    super(source, input);
-    charUtils = CharacterUtils.getInstance(matchVersion);
-  }
-  
-  /**
-   * Creates a new {@link CharTokenizer} instance
-   * 
-   * @param matchVersion
-   *          Lucene version to match
    * @param factory
    *          the attribute factory to use for this {@link Tokenizer}
    * @param input

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java Thu May 30 07:53:18 2013
@@ -131,7 +131,25 @@ public abstract class CharacterUtils {
     }
     return new CharacterBuffer(new char[bufferSize], 0, 0);
   }
-
+  
+  
+  /**
+   * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting 
+   * at the given offset.
+   * @param buffer the char buffer to lowercase
+   * @param offset the offset to start at
+   * @param limit the max char in the buffer to lower case
+   */
+  public void toLowerCase(final char[] buffer, final int offset, final int limit) {
+    assert buffer.length >= limit;
+    assert offset <=0 && offset <= buffer.length;
+    for (int i = offset; i < limit;) {
+      i += Character.toChars(
+              Character.toLowerCase(
+                  codePointAt(buffer, i)), buffer, i);
+     }
+  }
+  
   /**
    * Fills the {@link CharacterBuffer} with characters read from the given
    * reader {@link Reader}. This method tries to read as many characters into

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java Thu May 30 07:53:18 2013
@@ -67,14 +67,23 @@ public final class ClasspathResourceLoad
       throw new IOException("Resource not found: " + resource);
     return stream;
   }
+  
+  @Override
+  public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+    try {
+      return Class.forName(cname, true, loader).asSubclass(expectedType);
+    } catch (Exception e) {
+      throw new RuntimeException("Cannot load class: " + cname, e);
+    }
+  }
 
   @Override
   public <T> T newInstance(String cname, Class<T> expectedType) {
+    Class<? extends T> clazz = findClass(cname, expectedType);
     try {
-      final Class<? extends T> clazz = Class.forName(cname, true, loader).asSubclass(expectedType);
       return clazz.newInstance();
     } catch (Exception e) {
-      throw new RuntimeException("Cannot instantiate class: " + cname, e);
+      throw new RuntimeException("Cannot create instance: " + cname, e);
     }
   }
 }