You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC
svn commit: r1487777 [7/50] - in /lucene/dev/branches/security: ./
dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/mav...
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
* Portuguese words.
* <p>
* To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
* the {@link KeywordAttribute} before this {@link TokenStream}.
* </p>
*/
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PortugueseLightStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.PortugueseLightStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class PortugueseLightStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new PortugueseLightStemFilterFactory */
+ public PortugueseLightStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new PortugueseLightStemFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
* Portuguese words.
* <p>
* To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
* the {@link KeywordAttribute} before this {@link TokenStream}.
* </p>
*/
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PortugueseMinimalStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.PortugueseMinimalStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class PortugueseMinimalStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new PortugueseMinimalStemFilterFactory */
+ public PortugueseMinimalStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new PortugueseMinimalStemFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
* Portuguese words.
* <p>
* To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
* the {@link KeywordAttribute} before this {@link TokenStream}.
* </p>
*/
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PortugueseStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.PortugueseStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class PortugueseStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new PortugueseStemFilterFactory */
+ public PortugueseStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new PortugueseStemFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.rever
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link ReverseStringFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -31,13 +33,21 @@ import org.apache.lucene.analysis.util.T
* </analyzer>
* </fieldType></pre>
*
- *
* @since solr 1.4
*/
public class ReverseStringFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new ReverseStringFilterFactory */
+ public ReverseStringFilterFactory(Map<String,String> args) {
+ super(args);
+ assureMatchVersion();
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public ReverseStringFilter create(TokenStream in) {
- assureMatchVersion();
return new ReverseStringFilter(luceneMatchVersion,in);
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java Thu May 30 07:53:18 2013
@@ -23,7 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -94,7 +94,7 @@ public final class RomanianAnalyzer exte
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
- * provided this analyzer will add a {@link KeywordMarkerFilter} before
+ * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
@@ -116,7 +116,7 @@ public final class RomanianAnalyzer exte
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
- * , {@link KeywordMarkerFilter} if a stem exclusion set is
+ * , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
@@ -127,7 +127,7 @@ public final class RomanianAnalyzer exte
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
- result = new KeywordMarkerFilter(result, stemExclusionSet);
+ result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new RomanianStemmer());
return new TokenStreamComponents(source, result);
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Thu May 30 07:53:18 2013
@@ -29,7 +29,7 @@ import org.apache.lucene.analysis.util.S
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.IOUtils;
@@ -111,7 +111,7 @@ public final class RussianAnalyzer exten
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
- * , {@link KeywordMarkerFilter} if a stem exclusion set is
+ * , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided, and {@link SnowballFilter}
*/
@Override
@@ -122,7 +122,7 @@ public final class RussianAnalyzer exten
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if (!stemExclusionSet.isEmpty())
- result = new KeywordMarkerFilter(result, stemExclusionSet);
+ result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
return new TokenStreamComponents(source, result);
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
* words.
* <p>
* To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
* the {@link KeywordAttribute} before this {@link TokenStream}.
* </p>
*/
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.ru;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link RussianLightStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.RussianLightStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class RussianLightStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new RussianLightStemFilterFactory */
+ public RussianLightStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new RussianLightStemFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.shing
* limitations under the License.
*/
-import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -25,7 +24,7 @@ import java.util.Map;
/**
* Factory for {@link ShingleFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -33,41 +32,37 @@ import java.util.Map;
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class ShingleFilterFactory extends TokenFilterFactory {
- private int minShingleSize;
- private int maxShingleSize;
- private boolean outputUnigrams;
- private boolean outputUnigramsIfNoShingles;
- private String tokenSeparator;
+ private final int minShingleSize;
+ private final int maxShingleSize;
+ private final boolean outputUnigrams;
+ private final boolean outputUnigramsIfNoShingles;
+ private final String tokenSeparator;
- @Override
- public void init(Map<String, String> args) {
- super.init(args);
- maxShingleSize = getInt("maxShingleSize",
- ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+ /** Creates a new ShingleFilterFactory */
+ public ShingleFilterFactory(Map<String, String> args) {
+ super(args);
+ maxShingleSize = getInt(args, "maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
if (maxShingleSize < 2) {
- throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize
- + ") - must be at least 2");
+ throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize + ") - must be at least 2");
}
- minShingleSize = getInt("minShingleSize",
- ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
+ minShingleSize = getInt(args, "minShingleSize", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
if (minShingleSize < 2) {
- throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
- + ") - must be at least 2");
+ throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be at least 2");
}
if (minShingleSize > maxShingleSize) {
- throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
- + ") - must be no greater than maxShingleSize ("
- + maxShingleSize + ")");
+ throw new IllegalArgumentException
+ ("Invalid minShingleSize (" + minShingleSize + ") - must be no greater than maxShingleSize (" + maxShingleSize + ")");
+ }
+ outputUnigrams = getBoolean(args, "outputUnigrams", true);
+ outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
+ tokenSeparator = get(args, "tokenSeparator", ShingleFilter.TOKEN_SEPARATOR);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
}
- outputUnigrams = getBoolean("outputUnigrams", true);
- outputUnigramsIfNoShingles = getBoolean("outputUnigramsIfNoShingles", false);
- tokenSeparator = args.containsKey("tokenSeparator")
- ? args.get("tokenSeparator")
- : ShingleFilter.TOKEN_SEPARATOR;
}
+
@Override
public ShingleFilter create(TokenStream input) {
ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Thu May 30 07:53:18 2013
@@ -37,6 +37,18 @@ import org.tartarus.snowball.SnowballPro
* <li>For other languages, see {@link LowerCaseFilter}.
* </ul>
* </p>
+ *
+ * <p>
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ * </p>
+ *
+ *
*/
public final class SnowballFilter extends TokenFilter {
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java Thu May 30 07:53:18 2013
@@ -20,18 +20,20 @@ package org.apache.lucene.analysis.snowb
import java.util.Map;
import java.io.IOException;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.tartarus.snowball.SnowballProgram;
/**
* Factory for {@link SnowballFilter}, with configurable language
* <p>
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -39,26 +41,30 @@ import org.tartarus.snowball.SnowballPro
* <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
* </analyzer>
* </fieldType></pre>
- *
- *
*/
public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String PROTECTED_TOKENS = "protected";
- private String language = "English";
+ private final String language;
+ private final String wordFiles;
private Class<? extends SnowballProgram> stemClass;
private CharArraySet protectedWords = null;
+
+ /** Creates a new SnowballPorterFilterFactory */
+ public SnowballPorterFilterFactory(Map<String,String> args) {
+ super(args);
+ language = get(args, "language", "English");
+ wordFiles = get(args, PROTECTED_TOKENS);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public void inform(ResourceLoader loader) throws IOException {
- String cfgLanguage = args.get("language");
- if (cfgLanguage != null)
- language = cfgLanguage;
-
String className = "org.tartarus.snowball.ext." + language + "Stemmer";
stemClass = loader.newInstance(className, SnowballProgram.class).getClass();
- String wordFiles = args.get(PROTECTED_TOKENS);
if (wordFiles != null) {
protectedWords = getWordSet(loader, wordFiles, false);
}
@@ -74,7 +80,7 @@ public class SnowballPorterFilterFactory
}
if (protectedWords != null)
- input = new KeywordMarkerFilter(input, protectedWords);
+ input = new SetKeywordMarkerFilter(input, protectedWords);
return new SnowballFilter(input, program);
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.stand
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -24,17 +26,24 @@ import org.apache.lucene.analysis.standa
/**
* Factory for {@link ClassicFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.ClassicTokenizerFactory"/>
* <filter class="solr.ClassicFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
- *
*/
public class ClassicFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new ClassicFilterFactory */
+ public ClassicFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenFilter create(TokenStream input) {
return new ClassicFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Thu May 30 07:53:18 2013
@@ -104,14 +104,6 @@ public final class ClassicTokenizer exte
}
/**
- * Creates a new ClassicTokenizer with a given {@link AttributeSource}.
- */
- public ClassicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
- super(source, input);
- init(matchVersion);
- }
-
- /**
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
*/
public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,41 +17,37 @@ package org.apache.lucene.analysis.stand
* limitations under the License.
*/
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.ClassicTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link ClassicTokenizer}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/>
* </analyzer>
* </fieldType></pre>
- *
- *
*/
-
public class ClassicTokenizerFactory extends TokenizerFactory {
+ private final int maxTokenLength;
- private int maxTokenLength;
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
+ /** Creates a new ClassicTokenizerFactory */
+ public ClassicTokenizerFactory(Map<String,String> args) {
+ super(args);
assureMatchVersion();
- maxTokenLength = getInt("maxTokenLength",
- StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
- public Tokenizer create(Reader input) {
- ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, input);
+ public ClassicTokenizer create(AttributeFactory factory, Reader input) {
+ ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilterFactory.java Thu May 30 07:53:18 2013
@@ -25,20 +25,23 @@ import org.apache.lucene.analysis.util.T
/**
* Factory for {@link StandardFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
* <filter class="solr.StandardFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class StandardFilterFactory extends TokenFilterFactory {
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
+
+ /** Creates a new StandardFilterFactory */
+ public StandardFilterFactory(Map<String,String> args) {
+ super(args);
assureMatchVersion();
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu May 30 07:53:18 2013
@@ -118,14 +118,6 @@ public final class StandardTokenizer ext
}
/**
- * Creates a new StandardTokenizer with a given {@link AttributeSource}.
- */
- public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
- super(source, input);
- init(matchVersion);
- }
-
- /**
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
*/
public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,40 +17,37 @@ package org.apache.lucene.analysis.stand
* limitations under the License.
*/
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link StandardTokenizer}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/>
* </analyzer>
* </fieldType></pre>
- *
*/
-
public class StandardTokenizerFactory extends TokenizerFactory {
+ private final int maxTokenLength;
- private int maxTokenLength;
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
+ /** Creates a new StandardTokenizerFactory */
+ public StandardTokenizerFactory(Map<String,String> args) {
+ super(args);
assureMatchVersion();
- maxTokenLength = getInt("maxTokenLength",
- StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
- public StandardTokenizer create(Reader input) {
- StandardTokenizer tokenizer
- = new StandardTokenizer(luceneMatchVersion, input);
+ public StandardTokenizer create(AttributeFactory factory, Reader input) {
+ StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Thu May 30 07:53:18 2013
@@ -102,14 +102,6 @@ public final class UAX29URLEmailTokenize
}
/**
- * Creates a new UAX29URLEmailTokenizer with a given {@link AttributeSource}.
- */
- public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) {
- super(source, input);
- this.scanner = getScannerFor(matchVersion);
- }
-
- /**
* Creates a new UAX29URLEmailTokenizer with a given {@link AttributeFactory}
*/
public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java Thu May 30 07:53:18 2013
@@ -17,40 +17,37 @@ package org.apache.lucene.analysis.stand
* limitations under the License.
*/
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link UAX29URLEmailTokenizer}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
* </analyzer>
* </fieldType></pre>
- *
- *
*/
-
public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
+ private final int maxTokenLength;
- private int maxTokenLength;
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
+ /** Creates a new UAX29URLEmailTokenizerFactory */
+ public UAX29URLEmailTokenizerFactory(Map<String,String> args) {
+ super(args);
assureMatchVersion();
- maxTokenLength = getInt("maxTokenLength",
- StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
- public UAX29URLEmailTokenizer create(Reader input) {
- UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input);
+ public UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input) {
+ UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
tokenizer.setMaxTokenLength(maxTokenLength);
return tokenizer;
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java Thu May 30 07:53:18 2013
@@ -23,7 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -91,7 +91,7 @@ public final class SwedishAnalyzer exten
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
- * provided this analyzer will add a {@link KeywordMarkerFilter} before
+ * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
@@ -113,7 +113,7 @@ public final class SwedishAnalyzer exten
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
- * , {@link KeywordMarkerFilter} if a stem exclusion set is
+ * , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
@@ -124,7 +124,7 @@ public final class SwedishAnalyzer exten
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
- result = new KeywordMarkerFilter(result, stemExclusionSet);
+ result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new SwedishStemmer());
return new TokenStreamComponents(source, result);
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java Thu May 30 07:53:18 2013
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokena
* words.
* <p>
* To prevent terms from being stemmed use an instance of
- * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
* the {@link KeywordAttribute} before this {@link TokenStream}.
* </p>
*/
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,13 +17,15 @@ package org.apache.lucene.analysis.sv;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link SwedishLightStemFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.SwedishLightStemFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class SwedishLightStemFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new SwedishLightStemFilterFactory */
+ public SwedishLightStemFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new SwedishLightStemFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java Thu May 30 07:53:18 2013
@@ -25,35 +25,74 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.text.ParseException;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.synonym.SynonymFilter;
-import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.apache.lucene.analysis.synonym.SolrSynonymParser;
-import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
-import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.Version;
/**
* Factory for {@link SynonymFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
* format="solr" ignoreCase="false" expand="true"
- * tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
+ * tokenizerFactory="solr.WhitespaceTokenizerFactory"
+ * [optional tokenizer factory parameters]/>
* </analyzer>
* </fieldType></pre>
+ *
+ * <p>
+ * An optional param name prefix of "tokenizerFactory." may be used for any
+ * init params that the SynonymFilterFactory needs to pass to the specified
+ * TokenizerFactory. If the TokenizerFactory expects an init parameters with
+ * the same name as an init param used by the SynonymFilterFactory, the prefix
+ * is mandatory.
+ * </p>
*/
public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ private final boolean ignoreCase;
+ private final String tokenizerFactory;
+ private final String synonyms;
+ private final String format;
+ private final boolean expand;
+ private final Map<String, String> tokArgs = new HashMap<String, String>();
+
private SynonymMap map;
- private boolean ignoreCase;
+
+ public SynonymFilterFactory(Map<String,String> args) {
+ super(args);
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ synonyms = require(args, "synonyms");
+ format = get(args, "format");
+ expand = getBoolean(args, "expand", true);
+
+ tokenizerFactory = get(args, "tokenizerFactory");
+ if (tokenizerFactory != null) {
+ assureMatchVersion();
+ tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
+ for (Iterator<String> itr = args.keySet().iterator(); itr.hasNext();) {
+ String key = itr.next();
+ tokArgs.put(key.replaceAll("^tokenizerFactory\\.",""), args.get(key));
+ itr.remove();
+ }
+ }
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public TokenStream create(TokenStream input) {
@@ -64,12 +103,7 @@ public class SynonymFilterFactory extend
@Override
public void inform(ResourceLoader loader) throws IOException {
- final boolean ignoreCase = getBoolean("ignoreCase", false);
- this.ignoreCase = ignoreCase;
-
- String tf = args.get("tokenizerFactory");
-
- final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
+ final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer = new Analyzer() {
@Override
@@ -80,7 +114,6 @@ public class SynonymFilterFactory extend
}
};
- String format = args.get("format");
try {
if (format == null || format.equals("solr")) {
// TODO: expose dedup as a parameter?
@@ -99,12 +132,7 @@ public class SynonymFilterFactory extend
/**
* Load synonyms from the solr format, "format=solr".
*/
- private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
- final boolean expand = getBoolean("expand", true);
- String synonyms = args.get("synonyms");
- if (synonyms == null)
- throw new IllegalArgumentException("Missing required argument 'synonyms'.");
-
+ private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
@@ -128,11 +156,6 @@ public class SynonymFilterFactory extend
* Load synonyms from the wordnet format, "format=wordnet".
*/
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
- final boolean expand = getBoolean("expand", true);
- String synonyms = args.get("synonyms");
- if (synonyms == null)
- throw new IllegalArgumentException("Missing required argument 'synonyms'.");
-
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
@@ -154,12 +177,15 @@ public class SynonymFilterFactory extend
// (there are no tests for this functionality)
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
- TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
- tokFactory.setLuceneMatchVersion(luceneMatchVersion);
- tokFactory.init(args);
- if (tokFactory instanceof ResourceLoaderAware) {
- ((ResourceLoaderAware) tokFactory).inform(loader);
+ Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
+ try {
+ TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
+ if (tokFactory instanceof ResourceLoaderAware) {
+ ((ResourceLoaderAware) tokFactory).inform(loader);
+ }
+ return tokFactory;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
}
- return tokFactory;
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java Thu May 30 07:53:18 2013
@@ -90,26 +90,21 @@ public class SynonymMap {
public static CharsRef join(String[] words, CharsRef reuse) {
int upto = 0;
char[] buffer = reuse.chars;
- for(String word : words) {
- if (upto > 0) {
- if (upto >= buffer.length) {
- reuse.grow(upto);
- buffer = reuse.chars;
- }
- buffer[upto++] = SynonymMap.WORD_SEPARATOR;
- }
-
- final int wordLen = word.length();
- final int needed = upto + wordLen;
+ for (String word : words) {
+ final int wordLen = word.length();
+ final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
if (needed > buffer.length) {
reuse.grow(needed);
buffer = reuse.chars;
}
+ if (upto > 0) {
+ buffer[upto++] = SynonymMap.WORD_SEPARATOR;
+ }
word.getChars(0, wordLen, buffer, upto);
upto += wordLen;
}
-
+ reuse.length = upto;
return reuse;
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.th;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -24,19 +26,27 @@ import org.apache.lucene.analysis.util.T
/**
* Factory for {@link ThaiWordFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
* <filter class="solr.ThaiWordFilterFactory"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class ThaiWordFilterFactory extends TokenFilterFactory {
+
+ /** Creates a new ThaiWordFilterFactory */
+ public ThaiWordFilterFactory(Map<String,String> args) {
+ super(args);
+ assureMatchVersion();
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public ThaiWordFilter create(TokenStream input) {
- assureMatchVersion();
return new ThaiWordFilter(luceneMatchVersion, input);
}
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java Thu May 30 07:53:18 2013
@@ -22,7 +22,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -93,7 +93,7 @@ public final class TurkishAnalyzer exten
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
- * provided this analyzer will add a {@link KeywordMarkerFilter} before
+ * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
@@ -115,7 +115,7 @@ public final class TurkishAnalyzer exten
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
- * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem
+ * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
*/
@Override
@@ -126,7 +126,7 @@ public final class TurkishAnalyzer exten
result = new TurkishLowerCaseFilter(result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
- result = new KeywordMarkerFilter(result, stemExclusionSet);
+ result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new TurkishStemmer());
return new TokenStreamComponents(source, result);
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java Thu May 30 07:53:18 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.tr;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.T
/**
* Factory for {@link TurkishLowerCaseFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
* <filter class="solr.TurkishLowerCaseFilterFactory"/>
* </analyzer>
- * </fieldType></pre>
- *
+ * </fieldType></pre>
*/
public class TurkishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+
+ /** Creates a new TurkishLowerCaseFilterFactory */
+ public TurkishLowerCaseFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Thu May 30 07:53:18 2013
@@ -28,10 +28,14 @@ import java.io.Reader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -41,37 +45,33 @@ import java.util.regex.PatternSyntaxExce
* <p>
* The typical lifecycle for a factory consumer is:
* <ol>
- * <li>Create factory via its a no-arg constructor
- * <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
- * <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
+ * <li>Create factory via its constructor (or via XXXFactory.forName)
* <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
* <li>Consumer calls create() to obtain instances.
* </ol>
*/
public abstract class AbstractAnalysisFactory {
+ public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
- /** The original args, before init() processes them */
- private Map<String,String> originalArgs;
-
- /** The init args */
- protected Map<String,String> args;
+ /** The original args, before any processing */
+ private final Map<String,String> originalArgs;
/** the luceneVersion arg */
- protected Version luceneMatchVersion = null;
+ protected final Version luceneMatchVersion;
+ /** whether the luceneMatchVersion arg is explicitly specified in the serialized schema */
+ private boolean isExplicitLuceneMatchVersion = false;
/**
* Initialize this factory via a set of key-value pairs.
*/
- public void init(Map<String,String> args) {
- originalArgs = Collections.unmodifiableMap(args);
- this.args = new HashMap<String,String>(args);
- }
-
- public Map<String,String> getArgs() {
- return args;
+ protected AbstractAnalysisFactory(Map<String,String> args) {
+ originalArgs = Collections.unmodifiableMap(new HashMap<String,String>(args));
+ String version = get(args, LUCENE_MATCH_VERSION_PARAM);
+ luceneMatchVersion = version == null ? null : Version.parseLeniently(version);
+ args.remove(CLASS_NAME); // consume the class arg
}
- public Map<String,String> getOriginalArgs() {
+ public final Map<String,String> getOriginalArgs() {
return originalArgs;
}
@@ -85,56 +85,139 @@ public abstract class AbstractAnalysisFa
}
}
- public void setLuceneMatchVersion(Version luceneMatchVersion) {
- this.luceneMatchVersion = luceneMatchVersion;
+ public final Version getLuceneMatchVersion() {
+ return this.luceneMatchVersion;
+ }
+
+ public String require(Map<String,String> args, String name) {
+ String s = args.remove(name);
+ if (s == null) {
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
+ }
+ return s;
+ }
+ public String require(Map<String,String> args, String name, Collection<String> allowedValues) {
+ return require(args, name, allowedValues, true);
+ }
+ public String require(Map<String,String> args, String name, Collection<String> allowedValues, boolean caseSensitive) {
+ String s = args.remove(name);
+ if (s == null) {
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
+ } else {
+ for (String allowedValue : allowedValues) {
+ if (caseSensitive) {
+ if (s.equals(allowedValue)) {
+ return s;
+ }
+ } else {
+ if (s.equalsIgnoreCase(allowedValue)) {
+ return s;
+ }
+ }
+ }
+ throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+ }
+ }
+ public String get(Map<String,String> args, String name) {
+ return args.remove(name); // defaultVal = null
+ }
+ public String get(Map<String,String> args, String name, String defaultVal) {
+ String s = args.remove(name);
+ return s == null ? defaultVal : s;
+ }
+ public String get(Map<String,String> args, String name, Collection<String> allowedValues) {
+ return get(args, name, allowedValues, null); // defaultVal = null
+ }
+ public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal) {
+ return get(args, name, allowedValues, defaultVal, true);
+ }
+ public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal, boolean caseSensitive) {
+ String s = args.remove(name);
+ if (s == null) {
+ return defaultVal;
+ } else {
+ for (String allowedValue : allowedValues) {
+ if (caseSensitive) {
+ if (s.equals(allowedValue)) {
+ return s;
+ }
+ } else {
+ if (s.equalsIgnoreCase(allowedValue)) {
+ return s;
+ }
+ }
+ }
+ throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+ }
}
- public Version getLuceneMatchVersion() {
- return this.luceneMatchVersion;
+ protected final int requireInt(Map<String,String> args, String name) {
+ return Integer.parseInt(require(args, name));
+ }
+ protected final int getInt(Map<String,String> args, String name, int defaultVal) {
+ String s = args.remove(name);
+ return s == null ? defaultVal : Integer.parseInt(s);
}
- protected int getInt(String name) {
- return getInt(name, -1, false);
+ protected final boolean requireBoolean(Map<String,String> args, String name) {
+ return Boolean.parseBoolean(require(args, name));
+ }
+ protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
+ String s = args.remove(name);
+ return s == null ? defaultVal : Boolean.parseBoolean(s);
}
- protected int getInt(String name, int defaultVal) {
- return getInt(name, defaultVal, true);
+ protected final float requireFloat(Map<String,String> args, String name) {
+ return Float.parseFloat(require(args, name));
+ }
+ protected final float getFloat(Map<String,String> args, String name, float defaultVal) {
+ String s = args.remove(name);
+ return s == null ? defaultVal : Float.parseFloat(s);
}
- protected int getInt(String name, int defaultVal, boolean useDefault) {
- String s = args.get(name);
+ public char requireChar(Map<String,String> args, String name) {
+ return require(args, name).charAt(0);
+ }
+ public char getChar(Map<String,String> args, String name, char defaultValue) {
+ String s = args.remove(name);
if (s == null) {
- if (useDefault) {
- return defaultVal;
+ return defaultValue;
+ } else {
+ if (s.length() != 1) {
+ throw new IllegalArgumentException(name + " should be a char. \"" + s + "\" is invalid");
+ } else {
+ return s.charAt(0);
}
- throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
- return Integer.parseInt(s);
- }
-
- protected boolean getBoolean(String name, boolean defaultVal) {
- return getBoolean(name, defaultVal, true);
}
+
+ private static final Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
- protected boolean getBoolean(String name, boolean defaultVal, boolean useDefault) {
- String s = args.get(name);
- if (s==null) {
- if (useDefault) return defaultVal;
- throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
+ /** Returns whitespace- and/or comma-separated set of values, or null if none are found */
+ public Set<String> getSet(Map<String,String> args, String name) {
+ String s = args.remove(name);
+ if (s == null) {
+ return null;
+ } else {
+ Set<String> set = null;
+ Matcher matcher = ITEM_PATTERN.matcher(s);
+ if (matcher.find()) {
+ set = new HashSet<String>();
+ set.add(matcher.group(0));
+ while (matcher.find()) {
+ set.add(matcher.group(0));
+ }
+ }
+ return set;
}
- return Boolean.parseBoolean(s);
}
/**
* Compiles a pattern for the value of the specified argument key <code>name</code>
*/
- protected Pattern getPattern(String name) {
+ protected final Pattern getPattern(Map<String,String> args, String name) {
try {
- String pat = args.get(name);
- if (null == pat) {
- throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
- }
- return Pattern.compile(args.get(name));
+ return Pattern.compile(require(args, name));
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
@@ -146,7 +229,7 @@ public abstract class AbstractAnalysisFa
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
- protected CharArraySet getWordSet(ResourceLoader loader,
+ protected final CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
@@ -168,13 +251,13 @@ public abstract class AbstractAnalysisFa
/**
* Returns the resource's lines (with content treated as UTF-8)
*/
- protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+ protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
}
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
- protected CharArraySet getSnowballWordSet(ResourceLoader loader,
+ protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
@@ -209,7 +292,7 @@ public abstract class AbstractAnalysisFa
* @param fileNames the string containing file names
* @return a list of file names with the escaping backslashed removed
*/
- protected List<String> splitFileNames(String fileNames) {
+ protected final List<String> splitFileNames(String fileNames) {
if (fileNames == null)
return Collections.<String>emptyList();
@@ -220,4 +303,28 @@ public abstract class AbstractAnalysisFa
return result;
}
+
+ private static final String CLASS_NAME = "class";
+
+ /**
+ * @return the string used to specify the concrete class name in a serialized representation: the class arg.
+ * If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}.
+ */
+ public String getClassArg() {
+ if (null != originalArgs) {
+ String className = originalArgs.get(CLASS_NAME);
+ if (null != className) {
+ return className;
+ }
+ }
+ return getClass().getName();
+ }
+
+ public boolean isExplicitLuceneMatchVersion() {
+ return isExplicitLuceneMatchVersion;
+ }
+
+ public void setExplicitLuceneMatchVersion(boolean isExplicitLuceneMatchVersion) {
+ this.isExplicitLuceneMatchVersion = isExplicitLuceneMatchVersion;
+ }
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java Thu May 30 07:53:18 2013
@@ -52,6 +52,11 @@ final class AnalysisSPILoader<S extends
public AnalysisSPILoader(Class<S> clazz, String[] suffixes, ClassLoader classloader) {
this.clazz = clazz;
this.suffixes = suffixes;
+ // if clazz' classloader is not a parent of the given one, we scan clazz's classloader, too:
+ final ClassLoader clazzClassloader = clazz.getClassLoader();
+ if (clazzClassloader != null && !SPIClassIterator.isParentClassLoader(clazzClassloader, classloader)) {
+ reload(clazzClassloader);
+ }
reload(classloader);
}
@@ -99,10 +104,10 @@ final class AnalysisSPILoader<S extends
this.services = Collections.unmodifiableMap(services);
}
- public S newInstance(String name) {
+ public S newInstance(String name, Map<String,String> args) {
final Class<? extends S> service = lookupClass(name);
try {
- return service.newInstance();
+ return service.getConstructor(Map.class).newInstance(args);
} catch (Exception e) {
throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name '"+name+"' cannot be instantiated. " +
"This is likely due to a misconfiguration of the java class '" + service.getName() + "': ", e);
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Thu May 30 07:53:18 2013
@@ -215,12 +215,9 @@ public class CharArrayMap<V> extends Abs
* The user should never modify this text array after calling this method.
*/
public V put(char[] text, V value) {
- if (ignoreCase)
- for(int i=0;i<text.length;){
- i += Character.toChars(
- Character.toLowerCase(
- charUtils.codePointAt(text, i)), text, i);
- }
+ if (ignoreCase) {
+ charUtils.toLowerCase(text, 0, text.length);
+ }
int slot = getSlot(text, 0, text.length);
if (keys[slot] != null) {
final V oldValue = values[slot];
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Thu May 30 07:53:18 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
*/
import java.io.Reader;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CharFilter;
@@ -32,8 +33,8 @@ public abstract class CharFilterFactory
new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
/** looks up a charfilter by name from context classpath */
- public static CharFilterFactory forName(String name) {
- return loader.newInstance(name);
+ public static CharFilterFactory forName(String name, Map<String,String> args) {
+ return loader.newInstance(name, args);
}
/** looks up a charfilter class by name from context classpath */
@@ -61,6 +62,13 @@ public abstract class CharFilterFactory
loader.reload(classloader);
}
+ /**
+ * Initialize this factory via a set of key-value pairs.
+ */
+ protected CharFilterFactory(Map<String,String> args) {
+ super(args);
+ }
+
/** Wraps the given Reader with a CharFilter. */
public abstract Reader create(Reader input);
}
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Thu May 30 07:53:18 2013
@@ -51,22 +51,6 @@ public abstract class CharTokenizer exte
*
* @param matchVersion
* Lucene version to match
- * @param source
- * the attribute source to use for this {@link Tokenizer}
- * @param input
- * the input to split up into tokens
- */
- public CharTokenizer(Version matchVersion, AttributeSource source,
- Reader input) {
- super(source, input);
- charUtils = CharacterUtils.getInstance(matchVersion);
- }
-
- /**
- * Creates a new {@link CharTokenizer} instance
- *
- * @param matchVersion
- * Lucene version to match
* @param factory
* the attribute factory to use for this {@link Tokenizer}
* @param input
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java Thu May 30 07:53:18 2013
@@ -131,7 +131,25 @@ public abstract class CharacterUtils {
}
return new CharacterBuffer(new char[bufferSize], 0, 0);
}
-
+
+
+ /**
+ * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting
+ * at the given offset.
+ * @param buffer the char buffer to lowercase
+ * @param offset the offset to start at
+ * @param limit the max char in the buffer to lower case
+ */
+ public void toLowerCase(final char[] buffer, final int offset, final int limit) {
+ assert buffer.length >= limit;
+ assert offset <=0 && offset <= buffer.length;
+ for (int i = offset; i < limit;) {
+ i += Character.toChars(
+ Character.toLowerCase(
+ codePointAt(buffer, i)), buffer, i);
+ }
+ }
+
/**
* Fills the {@link CharacterBuffer} with characters read from the given
* reader {@link Reader}. This method tries to read as many characters into
Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ClasspathResourceLoader.java Thu May 30 07:53:18 2013
@@ -67,14 +67,23 @@ public final class ClasspathResourceLoad
throw new IOException("Resource not found: " + resource);
return stream;
}
+
+ @Override
+ public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+ try {
+ return Class.forName(cname, true, loader).asSubclass(expectedType);
+ } catch (Exception e) {
+ throw new RuntimeException("Cannot load class: " + cname, e);
+ }
+ }
@Override
public <T> T newInstance(String cname, Class<T> expectedType) {
+ Class<? extends T> clazz = findClass(cname, expectedType);
try {
- final Class<? extends T> clazz = Class.forName(cname, true, loader).asSubclass(expectedType);
return clazz.newInstance();
} catch (Exception e) {
- throw new RuntimeException("Cannot instantiate class: " + cname, e);
+ throw new RuntimeException("Cannot create instance: " + cname, e);
}
}
}