You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/06 13:08:21 UTC
svn commit: r1465224 [8/9] - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/
lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/
lucene/analysis/common/src/java/or...
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java Sat Apr 6 11:08:17 2013
@@ -25,7 +25,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
@@ -75,21 +74,17 @@ import com.ibm.icu.text.RuleBasedBreakIt
* rulefiles="Latn:my.Latin.rules.rbbi,Cyrl:my.Cyrillic.rules.rbbi"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
static final String RULEFILES = "rulefiles";
- private Map<Integer,String> tailored;
+ private final Map<Integer,String> tailored;
private ICUTokenizerConfig config;
- /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
- public ICUTokenizerFactory() {}
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
+ /** Creates a new ICUTokenizerFactory */
+ public ICUTokenizerFactory(Map<String,String> args) {
+ super(args);
tailored = new HashMap<Integer,String>();
- String rulefilesArg = args.get(RULEFILES);
+ String rulefilesArg = args.remove(RULEFILES);
if (rulefilesArg != null) {
List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
for (String scriptAndResourcePath : scriptAndResourcePaths) {
@@ -99,6 +94,9 @@ public class ICUTokenizerFactory extends
tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath);
}
}
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.collation;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
+import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.*;
@@ -71,18 +72,29 @@ import com.ibm.icu.util.ULocale;
@Deprecated
public class ICUCollationKeyFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent, ResourceLoaderAware {
private Collator collator;
+ private final String custom;
+ private final String localeID;
+ private final String strength;
+ private final String decomposition;
- public void inform(ResourceLoader loader) throws IOException {
- String custom = args.get("custom");
- String localeID = args.get("locale");
- String strength = args.get("strength");
- String decomposition = args.get("decomposition");
-
- String alternate = args.get("alternate");
- String caseLevel = args.get("caseLevel");
- String caseFirst = args.get("caseFirst");
- String numeric = args.get("numeric");
- String variableTop = args.get("variableTop");
+ private final String alternate;
+ private final String caseLevel;
+ private final String caseFirst;
+ private final String numeric;
+ private final String variableTop;
+
+ public ICUCollationKeyFilterFactory(Map<String,String> args) {
+ super(args);
+ custom = args.remove("custom");
+ localeID = args.remove("locale");
+ strength = args.remove("strength");
+ decomposition = args.remove("decomposition");
+
+ alternate = args.remove("alternate");
+ caseLevel = args.remove("caseLevel");
+ caseFirst = args.remove("caseFirst");
+ numeric = args.remove("numeric");
+ variableTop = args.remove("variableTop");
if (custom == null && localeID == null)
throw new IllegalArgumentException("Either custom or locale is required.");
@@ -92,6 +104,12 @@ public class ICUCollationKeyFilterFactor
+ "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. "
+ "Then save the entire customized ruleset to a file, and use with the custom parameter");
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ public void inform(ResourceLoader loader) throws IOException {
if (localeID != null) {
// create from a system collator, based on Locale.
collator = createFromLocale(localeID);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -19,11 +19,11 @@ package org.apache.lucene.analysis.icu;
import java.io.Reader;
import java.io.StringReader;
+import java.util.HashMap;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
/** basic tests for {@link ICUFoldingFilterFactory} */
public class TestICUFoldingFilterFactory extends BaseTokenStreamTestCase {
@@ -31,10 +31,21 @@ public class TestICUFoldingFilterFactory
/** basic tests to ensure the folding is working */
public void test() throws Exception {
Reader reader = new StringReader("Résumé");
- ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
- TokenStream stream = factory.create(tokenizer);
+ ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory(new HashMap<String,String>());
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "resume" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new ICUFoldingFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2FilterFactory.java Sat Apr 6 11:08:17 2013
@@ -19,13 +19,11 @@ package org.apache.lucene.analysis.icu;
import java.io.Reader;
import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/** basic tests for {@link ICUNormalizer2FilterFactory} */
public class TestICUNormalizer2FilterFactory extends BaseTokenStreamTestCase {
@@ -33,14 +31,23 @@ public class TestICUNormalizer2FilterFac
/** Test nfkc_cf defaults */
public void testDefaults() throws Exception {
Reader reader = new StringReader("This is a ï¼´ï½
ï½ï½");
- ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
- TokenStream stream = factory.create(tokenizer);
+ ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory(new HashMap<String,String>());
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
}
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new ICUNormalizer2FilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
+
// TODO: add tests for different forms
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -23,9 +23,8 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/** basic tests for {@link ICUTransformFilterFactory} */
public class TestICUTransformFilterFactory extends BaseTokenStreamTestCase {
@@ -33,33 +32,48 @@ public class TestICUTransformFilterFacto
/** ensure the transform is working */
public void test() throws Exception {
Reader reader = new StringReader("ç°¡åå");
- ICUTransformFilterFactory factory = new ICUTransformFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("id", "Traditional-Simplified");
- factory.init(args);
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
- TokenStream stream = factory.create(tokenizer);
+ ICUTransformFilterFactory factory = new ICUTransformFilterFactory(args);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "ç®åå" });
}
/** test forward and reverse direction */
- public void testDirection() throws Exception {
+ public void testForwardDirection() throws Exception {
// forward
Reader reader = new StringReader("РоÑÑийÑÐºÐ°Ñ Ð¤ÐµÐ´ÐµÑаÑиÑ");
- ICUTransformFilterFactory factory = new ICUTransformFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("id", "Cyrillic-Latin");
- factory.init(args);
- Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
- TokenStream stream = factory.create(tokenizer);
+ ICUTransformFilterFactory factory = new ICUTransformFilterFactory(args);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "Rossijskaâ", "Federaciâ" });
-
+ }
+
+ public void testReverseDirection() throws Exception {
// backward (invokes Latin-Cyrillic)
- reader = new StringReader("Rossijskaâ Federaciâ");
+ Reader reader = new StringReader("Rossijskaâ Federaciâ");
+ Map<String,String> args = new HashMap<String,String>();
+ args.put("id", "Cyrillic-Latin");
args.put("direction", "reverse");
- factory.init(args);
- tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
- stream = factory.create(tokenizer);
+ ICUTransformFilterFactory factory = new ICUTransformFilterFactory(args);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] { "РоÑÑийÑкаÑ", "ФедеÑаÑиÑ" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new ICUTransformFilterFactory(new HashMap<String,String>() {{
+ put("id", "Null");
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java Sat Apr 6 11:08:17 2013
@@ -30,8 +30,7 @@ import org.apache.lucene.analysis.util.C
public class TestICUTokenizerFactory extends BaseTokenStreamTestCase {
public void testMixedText() throws Exception {
Reader reader = new StringReader("à¸à¸²à¸£à¸à¸µà¹à¹à¸à¹à¸à¹à¸à¸à¹à¸ªà¸à¸à¸§à¹à¸²à¸à¸²à¸à¸à¸µ This is a test àºàº§à»àº²àºàºàº");
- ICUTokenizerFactory factory = new ICUTokenizerFactory();
- factory.init(new HashMap<String,String>());
+ ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream stream = factory.create(reader);
assertTokenStreamContents(stream,
@@ -43,10 +42,9 @@ public class TestICUTokenizerFactory ext
// â U+201C LEFT DOUBLE QUOTATION MARK; â U+201D RIGHT DOUBLE QUOTATION MARK
Reader reader = new StringReader
(" Don't,break.at?/(punct)! \u201Cnice\u201D\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$ ");
- ICUTokenizerFactory factory = new ICUTokenizerFactory();
final Map<String,String> args = new HashMap<String,String>();
args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
- factory.init(args);
+ ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(this.getClass()));
TokenStream stream = factory.create(reader);
assertTokenStreamContents(stream,
@@ -57,10 +55,9 @@ public class TestICUTokenizerFactory ext
public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
Reader reader = new StringReader
("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish.");
- ICUTokenizerFactory factory = new ICUTokenizerFactory();
final Map<String,String> args = new HashMap<String,String>();
args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
- factory.init(args);
+ ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream stream = factory.create(reader);
assertTokenStreamContents(stream,
@@ -77,10 +74,9 @@ public class TestICUTokenizerFactory ext
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
Reader reader = new StringReader
("Some English. Ðемного ÑÑÑÑкий. à¸à¹à¸à¸à¸§à¸²à¸¡à¸ าษาà¹à¸à¸¢à¹à¸¥à¹à¸ ๠à¸à¹à¸à¸¢ ๠More English.");
- ICUTokenizerFactory factory = new ICUTokenizerFactory();
final Map<String,String> args = new HashMap<String,String>();
args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
- factory.init(args);
+ ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream stream = factory.create(reader);
assertTokenStreamContents(stream, new String[] { "Some", "English",
@@ -88,4 +84,16 @@ public class TestICUTokenizerFactory ext
"à¸à¹à¸à¸à¸§à¸²à¸¡à¸ าษาà¹à¸à¸¢à¹à¸¥à¹à¸ ๠à¸à¹à¸à¸¢ ๠",
"More", "English" });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new ICUTokenizerFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -21,15 +21,21 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
+import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.Version;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
@@ -44,15 +50,12 @@ public class TestICUCollationKeyFilterFa
* Instead of using LowerCaseFilter, use a turkish collator with primary strength.
* Then things will sort and match correctly.
*/
- public void testBasicUsage() throws IOException {
+ public void testBasicUsage() throws Exception {
String turkishUpperCase = "I WÄ°LL USE TURKÄ°SH CASING";
String turkishLowerCase = "ı will use turkish casıng";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "tr");
- args.put("strength", "primary");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "tr",
+ "strength", "primary");
TokenStream tsUpper = factory.create(
new KeywordTokenizer(new StringReader(turkishUpperCase)));
TokenStream tsLower = factory.create(
@@ -63,16 +66,13 @@ public class TestICUCollationKeyFilterFa
/*
* Test usage of the decomposition option for unicode normalization.
*/
- public void testNormalization() throws IOException {
+ public void testNormalization() throws Exception {
String turkishUpperCase = "I W\u0049\u0307LL USE TURKÄ°SH CASING";
String turkishLowerCase = "ı will use turkish casıng";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "tr");
- args.put("strength", "primary");
- args.put("decomposition", "canonical");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "tr",
+ "strength", "primary",
+ "decomposition", "canonical");
TokenStream tsUpper = factory.create(
new KeywordTokenizer(new StringReader(turkishUpperCase)));
TokenStream tsLower = factory.create(
@@ -83,16 +83,13 @@ public class TestICUCollationKeyFilterFa
/*
* Test secondary strength, for english case is not significant.
*/
- public void testSecondaryStrength() throws IOException {
+ public void testSecondaryStrength() throws Exception {
String upperCase = "TESTING";
String lowerCase = "testing";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "en");
- args.put("strength", "secondary");
- args.put("decomposition", "no");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "en",
+ "strength", "secondary",
+ "decomposition", "no");
TokenStream tsUpper = factory.create(
new KeywordTokenizer(new StringReader(upperCase)));
TokenStream tsLower = factory.create(
@@ -104,16 +101,13 @@ public class TestICUCollationKeyFilterFa
* Setting alternate=shifted to shift whitespace, punctuation and symbols
* to quaternary level
*/
- public void testIgnorePunctuation() throws IOException {
+ public void testIgnorePunctuation() throws Exception {
String withPunctuation = "foo-bar";
String withoutPunctuation = "foo bar";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "en");
- args.put("strength", "primary");
- args.put("alternate", "shifted");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "en",
+ "strength", "primary",
+ "alternate", "shifted");
TokenStream tsPunctuation = factory.create(
new KeywordTokenizer(new StringReader(withPunctuation)));
TokenStream tsWithoutPunctuation = factory.create(
@@ -125,18 +119,15 @@ public class TestICUCollationKeyFilterFa
* Setting alternate=shifted and variableTop to shift whitespace, but not
* punctuation or symbols, to quaternary level
*/
- public void testIgnoreWhitespace() throws IOException {
+ public void testIgnoreWhitespace() throws Exception {
String withSpace = "foo bar";
String withoutSpace = "foobar";
String withPunctuation = "foo-bar";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "en");
- args.put("strength", "primary");
- args.put("alternate", "shifted");
- args.put("variableTop", " ");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "en",
+ "strength", "primary",
+ "alternate", "shifted",
+ "variableTop", " ");
TokenStream tsWithSpace = factory.create(
new KeywordTokenizer(new StringReader(withSpace)));
TokenStream tsWithoutSpace = factory.create(
@@ -154,15 +145,12 @@ public class TestICUCollationKeyFilterFa
* Setting numeric to encode digits with numeric value, so that
* foobar-9 sorts before foobar-10
*/
- public void testNumerics() throws IOException {
+ public void testNumerics() throws Exception {
String nine = "foobar-9";
String ten = "foobar-10";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "en");
- args.put("numeric", "true");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "en",
+ "numeric", "true");
TokenStream tsNine = factory.create(
new KeywordTokenizer(new StringReader(nine)));
TokenStream tsTen = factory.create(
@@ -174,18 +162,15 @@ public class TestICUCollationKeyFilterFa
* Setting caseLevel=true to create an additional case level between
* secondary and tertiary
*/
- public void testIgnoreAccentsButNotCase() throws IOException {
+ public void testIgnoreAccentsButNotCase() throws Exception {
String withAccents = "résumé";
String withoutAccents = "resume";
String withAccentsUpperCase = "Résumé";
String withoutAccentsUpperCase = "Resume";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "en");
- args.put("strength", "primary");
- args.put("caseLevel", "true");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "en",
+ "strength", "primary",
+ "caseLevel", "true");
TokenStream tsWithAccents = factory.create(
new KeywordTokenizer(new StringReader(withAccents)));
TokenStream tsWithoutAccents = factory.create(
@@ -210,16 +195,13 @@ public class TestICUCollationKeyFilterFa
* Setting caseFirst=upper to cause uppercase strings to sort
* before lowercase ones.
*/
- public void testUpperCaseFirst() throws IOException {
+ public void testUpperCaseFirst() throws Exception {
String lower = "resume";
String upper = "Resume";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
- Map<String,String> args = new HashMap<String,String>();
- args.put("locale", "en");
- args.put("strength", "tertiary");
- args.put("caseFirst", "upper");
- factory.init(args);
- factory.inform(new StringMockResourceLoader(""));
+ TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
+ "locale", "en",
+ "strength", "tertiary",
+ "caseFirst", "upper");
TokenStream tsLower = factory.create(
new KeywordTokenizer(new StringReader(lower)));
TokenStream tsUpper = factory.create(
@@ -250,11 +232,10 @@ public class TestICUCollationKeyFilterFa
//
String germanUmlaut = "Töne";
String germanOE = "Toene";
- ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("custom", "rules.txt");
args.put("strength", "primary");
- factory.init(args);
+ ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory(args);
factory.inform(new StringMockResourceLoader(tailoredRules));
TokenStream tsUmlaut = factory.create(
new KeywordTokenizer(new StringReader(germanUmlaut)));
@@ -291,8 +272,42 @@ public class TestICUCollationKeyFilterFa
return null;
}
+ @Override
+ public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+ return null;
+ }
+
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes("UTF-8"));
}
}
+
+ private TokenFilterFactory tokenFilterFactory(String name, String... keysAndValues) throws Exception {
+ Class<? extends TokenFilterFactory> clazz = TokenFilterFactory.lookupClass(name);
+ if (keysAndValues.length % 2 == 1) {
+ throw new IllegalArgumentException("invalid keysAndValues map");
+ }
+ Map<String,String> args = new HashMap<String,String>();
+ for (int i = 0; i < keysAndValues.length; i += 2) {
+ String previous = args.put(keysAndValues[i], keysAndValues[i+1]);
+ assertNull("duplicate values for key: " + keysAndValues[i], previous);
+ }
+ String previous = args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
+ assertNull("duplicate values for key: luceneMatchVersion", previous);
+ TokenFilterFactory factory = null;
+ try {
+ factory = clazz.getConstructor(Map.class).newInstance(args);
+ } catch (InvocationTargetException e) {
+ // to simplify tests that check for illegal parameters
+ if (e.getCause() instanceof IllegalArgumentException) {
+ throw (IllegalArgumentException) e.getCause();
+ } else {
+ throw e;
+ }
+ }
+ if (factory instanceof ResourceLoaderAware) {
+ ((ResourceLoaderAware) factory).inform(new ClasspathResourceLoader(getClass()));
+ }
+ return factory;
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.ja;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ja.JapaneseBaseFormFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -34,6 +36,14 @@ import org.apache.lucene.analysis.util.T
*/
public class JapaneseBaseFormFilterFactory extends TokenFilterFactory {
+ /** Creates a new JapaneseBaseFormFilterFactory */
+ public JapaneseBaseFormFilterFactory(Map<String,String> args) {
+ super(args);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public TokenStream create(TokenStream input) {
return new JapaneseBaseFormFilter(input);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -39,12 +39,20 @@ import java.util.Map;
public class JapaneseIterationMarkCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
private static final String NORMALIZE_KANJI_PARAM = "normalizeKanji";
-
private static final String NORMALIZE_KANA_PARAM = "normalizeKana";
- private boolean normalizeKanji = true;
-
- private boolean normalizeKana = true;
+ private final boolean normalizeKanji;
+ private final boolean normalizeKana;
+
+ /** Creates a new JapaneseIterationMarkCharFilterFactory */
+ public JapaneseIterationMarkCharFilterFactory(Map<String,String> args) {
+ super(args);
+ normalizeKanji = getBoolean(args, NORMALIZE_KANJI_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT);
+ normalizeKana = getBoolean(args, NORMALIZE_KANA_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public CharFilter create(Reader input) {
@@ -52,13 +60,6 @@ public class JapaneseIterationMarkCharFi
}
@Override
- public void init(Map<String, String> args) {
- super.init(args);
- normalizeKanji = getBoolean(NORMALIZE_KANJI_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT);
- normalizeKana = getBoolean(NORMALIZE_KANA_PARAM, JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT);
- }
-
- @Override
public AbstractAnalysisFactory getMultiTermComponent() {
return this;
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -37,15 +37,18 @@ import java.util.Map;
*/
public class JapaneseKatakanaStemFilterFactory extends TokenFilterFactory {
private static final String MINIMUM_LENGTH_PARAM = "minimumLength";
- private int minimumLength;
+ private final int minimumLength;
- @Override
- public void init(Map<String, String> args) {
- super.init(args);
- minimumLength = getInt(MINIMUM_LENGTH_PARAM, JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH);
+ /** Creates a new JapaneseKatakanaStemFilterFactory */
+ public JapaneseKatakanaStemFilterFactory(Map<String,String> args) {
+ super(args);
+ minimumLength = getInt(args, MINIMUM_LENGTH_PARAM, JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH);
if (minimumLength < 2) {
throw new IllegalArgumentException("Illegal " + MINIMUM_LENGTH_PARAM + " " + minimumLength + " (must be 2 or greater)");
}
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ja;
import java.io.IOException;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
@@ -39,13 +40,22 @@ import org.apache.lucene.analysis.util.*
* </pre>
*/
public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
- private boolean enablePositionIncrements;
+ private final String stopTagFiles;
+ private final boolean enablePositionIncrements;
private Set<String> stopTags;
+ /** Creates a new JapanesePartOfSpeechStopFilterFactory */
+ public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) {
+ super(args);
+ stopTagFiles = args.remove("tags");
+ enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public void inform(ResourceLoader loader) throws IOException {
- String stopTagFiles = args.get("tags");
- enablePositionIncrements = getBoolean("enablePositionIncrements", false);
stopTags = null;
CharArraySet cas = getWordSet(loader, stopTagFiles, false);
if (cas != null) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -37,12 +37,15 @@ import java.util.Map;
*/
public class JapaneseReadingFormFilterFactory extends TokenFilterFactory {
private static final String ROMAJI_PARAM = "useRomaji";
- private boolean useRomaji;
+ private final boolean useRomaji;
- @Override
- public void init(Map<String, String> args) {
- super.init(args);
- useRomaji = getBoolean(ROMAJI_PARAM, false);
+ /** Creates a new JapaneseReadingFormFilterFactory */
+ public JapaneseReadingFormFilterFactory(Map<String,String> args) {
+ super(args);
+ useRomaji = getBoolean(args, ROMAJI_PARAM, false);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java Sat Apr 6 11:08:17 2013
@@ -62,17 +62,28 @@ public class JapaneseTokenizerFactory ex
private UserDictionary userDictionary;
- private Mode mode;
-
- private boolean discardPunctuation;
+ private final Mode mode;
+ private final boolean discardPunctuation;
+ private final String userDictionaryPath;
+ private final String userDictionaryEncoding;
+ /** Creates a new JapaneseTokenizerFactory */
+ public JapaneseTokenizerFactory(Map<String,String> args) {
+ super(args);
+ mode = getMode(args);
+ userDictionaryPath = args.remove(USER_DICT_PATH);
+ userDictionaryEncoding = args.remove(USER_DICT_ENCODING);
+ discardPunctuation = getBoolean(args, DISCARD_PUNCTUATION, true);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
@Override
public void inform(ResourceLoader loader) throws IOException {
- mode = getMode(args);
- String userDictionaryPath = args.get(USER_DICT_PATH);
if (userDictionaryPath != null) {
InputStream stream = loader.openResource(userDictionaryPath);
- String encoding = args.get(USER_DICT_ENCODING);
+ String encoding = userDictionaryEncoding;
if (encoding == null) {
encoding = IOUtils.UTF_8;
}
@@ -84,7 +95,6 @@ public class JapaneseTokenizerFactory ex
} else {
userDictionary = null;
}
- discardPunctuation = getBoolean(DISCARD_PUNCTUATION, true);
}
@Override
@@ -92,10 +102,10 @@ public class JapaneseTokenizerFactory ex
return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode);
}
- private Mode getMode(Map<String, String> args) {
- String mode = args.get(MODE);
- if (mode != null) {
- return Mode.valueOf(mode.toUpperCase(Locale.ROOT));
+ private Mode getMode(Map<String,String> args) {
+ String modeArg = args.remove(MODE);
+ if (modeArg != null) {
+ return Mode.valueOf(modeArg.toUpperCase(Locale.ROOT));
} else {
return JapaneseTokenizer.DEFAULT_MODE;
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java Sat Apr 6 11:08:17 2013
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.ja;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
import org.apache.lucene.analysis.util.ResourceLoader;
@@ -34,12 +32,21 @@ class StringMockResourceLoader implement
}
@Override
+ public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+ try {
+ return Class.forName(cname).asSubclass(expectedType);
+ } catch (Exception e) {
+ throw new RuntimeException("Cannot load class: " + cname, e);
+ }
+ }
+
+ @Override
public <T> T newInstance(String cname, Class<T> expectedType) {
+ Class<? extends T> clazz = findClass(cname, expectedType);
try {
- Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
return clazz.newInstance();
} catch (Exception e) {
- throw new RuntimeException(e);
+ throw new RuntimeException("Cannot create instance: " + cname, e);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -19,8 +19,7 @@ package org.apache.lucene.analysis.ja;
import java.io.IOException;
import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
@@ -30,16 +29,25 @@ import org.apache.lucene.analysis.TokenS
*/
public class TestJapaneseBaseFormFilterFactory extends BaseTokenStreamTestCase {
public void testBasics() throws IOException {
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- tokenizerFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- tokenizerFactory.init(args);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
TokenStream ts = tokenizerFactory.create(new StringReader("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ã"));
- JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory();
+ JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory(new HashMap<String,String>());
ts = factory.create(ts);
assertTokenStreamContents(ts,
new String[] { "ãã", "ã¯", "ã¾ã ", "å®é¨", "段é", "ã«", "ãã", "ã¾ã" }
);
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new JapaneseBaseFormFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -24,7 +24,6 @@ import org.apache.lucene.analysis.TokenS
import java.io.IOException;
import java.io.StringReader;
-import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -35,22 +34,17 @@ public class TestJapaneseIterationMarkCh
public void testIterationMarksWithKeywordTokenizer() throws IOException {
final String text = "æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾";
- JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
+ JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
CharFilter filter = filterFactory.create(new StringReader(text));
TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
assertTokenStreamContents(tokenStream, new String[]{"ææ馬鹿馬鹿ããã¨ããã©ãããã¹ãº"});
}
public void testIterationMarksWithJapaneseTokenizer() throws IOException {
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- Map<String, String> tokenizerArgs = Collections.emptyMap();
- tokenizerFactory.init(tokenizerArgs);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
- Map<String, String> filterArgs = Collections.emptyMap();
- filterFactory.init(filterArgs);
-
+ JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
CharFilter filter = filterFactory.create(
new StringReader("æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾")
);
@@ -59,16 +53,13 @@ public class TestJapaneseIterationMarkCh
}
public void testKanjiOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- Map<String, String> tokenizerArgs = Collections.emptyMap();
- tokenizerFactory.init(tokenizerArgs);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
Map<String, String> filterArgs = new HashMap<String, String>();
filterArgs.put("normalizeKanji", "true");
filterArgs.put("normalizeKana", "false");
- filterFactory.init(filterArgs);
+ JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
CharFilter filter = filterFactory.create(
new StringReader("æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾")
@@ -78,16 +69,13 @@ public class TestJapaneseIterationMarkCh
}
public void testKanaOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- Map<String, String> tokenizerArgs = Collections.emptyMap();
- tokenizerFactory.init(tokenizerArgs);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
- JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
Map<String, String> filterArgs = new HashMap<String, String>();
filterArgs.put("normalizeKanji", "false");
filterArgs.put("normalizeKana", "true");
- filterFactory.init(filterArgs);
+ JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
CharFilter filter = filterFactory.create(
new StringReader("æã
馬鹿ã
ã
ããã¨ããããããã¹ã¾")
@@ -95,4 +83,16 @@ public class TestJapaneseIterationMarkCh
TokenStream tokenStream = tokenizerFactory.create(filter);
assertTokenStreamContents(tokenStream, new String[]{"æã
", "馬鹿", "ã
", "ã
", "ãã", "ã¨ããã©ãã", "ã", "ã¹ãº"});
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -22,27 +22,34 @@ import org.apache.lucene.analysis.TokenS
import java.io.IOException;
import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
/**
* Simple tests for {@link JapaneseKatakanaStemFilterFactory}
*/
public class TestJapaneseKatakanaStemFilterFactory extends BaseTokenStreamTestCase {
public void testKatakanaStemming() throws IOException {
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- Map<String, String> tokenizerArgs = Collections.emptyMap();
- tokenizerFactory.init(tokenizerArgs);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
TokenStream tokenStream = tokenizerFactory.create(
new StringReader("æå¾æ¥ãã¼ãã£ã¼ã«è¡ãäºå®ããããå³æ¸é¤¨ã§è³æãã³ãã¼ãã¾ããã")
);
- JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory();
- Map<String, String> filterArgs = Collections.emptyMap();
- filterFactory.init(filterArgs);
+ JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory(new HashMap<String,String>());;
assertTokenStreamContents(filterFactory.create(tokenStream),
new String[]{ "æå¾æ¥", "ãã¼ãã£", "ã«", "è¡ã", "äºå®", "ã", "ãã", // ãã¼ãã£ã¼ should be stemmed
"å³æ¸é¤¨", "ã§", "è³æ", "ã", "ã³ãã¼", "ã", "ã¾ã", "ã"} // ã³ãã¼ should not be stemmed
);
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new JapaneseKatakanaStemFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ja;
import java.io.IOException;
import java.io.StringReader;
-import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -35,21 +34,30 @@ public class TestJapanesePartOfSpeechSto
"# verb-main:\n" +
"åè©-èªç«\n";
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- tokenizerFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> tokenizerArgs = Collections.emptyMap();
- tokenizerFactory.init(tokenizerArgs);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
TokenStream ts = tokenizerFactory.create(new StringReader("ç§ã¯å¶éã¹ãã¼ããè¶
ããã"));
- JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory();
Map<String,String> args = new HashMap<String,String>();
+ args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
args.put("tags", "stoptags.txt");
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(args);
+ JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory(args);
factory.inform(new StringMockResourceLoader(tags));
ts = factory.create(ts);
assertTokenStreamContents(ts,
new String[] { "ç§", "ã¯", "å¶é", "ã¹ãã¼ã", "ã" }
);
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new JapanesePartOfSpeechStopFilterFactory(new HashMap<String,String>() {{
+ put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -22,22 +22,31 @@ import org.apache.lucene.analysis.TokenS
import java.io.IOException;
import java.io.StringReader;
-import java.util.Collections;
-import java.util.Map;
+import java.util.HashMap;
/**
* Simple tests for {@link JapaneseReadingFormFilterFactory}
*/
public class TestJapaneseReadingFormFilterFactory extends BaseTokenStreamTestCase {
public void testReadings() throws IOException {
- JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
- Map<String, String> args = Collections.emptyMap();
- tokenizerFactory.init(args);
+ JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
TokenStream tokenStream = tokenizerFactory.create(new StringReader("å
ã»ã©ãã«ãªã³ããæ¥ã¾ããã"));
- JapaneseReadingFormFilterFactory filterFactory = new JapaneseReadingFormFilterFactory();
+ JapaneseReadingFormFilterFactory filterFactory = new JapaneseReadingFormFilterFactory(new HashMap<String,String>());
assertTokenStreamContents(filterFactory.create(tokenStream),
new String[] { "ãµã", "ãã", "ãã«ãªã³", "ã«ã©", "ã", "ãã·", "ã¿" }
);
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new JapaneseReadingFormFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java Sat Apr 6 11:08:17 2013
@@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenS
*/
public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase {
public void testSimple() throws IOException {
- JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
+ JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String,String>());
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(new StringReader("ããã¯æ¬ã§ã¯ãªã"));
assertTokenStreamContents(ts,
@@ -48,10 +45,7 @@ public class TestJapaneseTokenizerFactor
* Test that search mode is enabled and working by default
*/
public void testDefaults() throws IOException {
- JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
+ JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap<String,String>());
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
assertTokenStreamContents(ts,
@@ -63,10 +57,9 @@ public class TestJapaneseTokenizerFactor
* Test mode parameter: specifying normal mode
*/
public void testMode() throws IOException {
- JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("mode", "normal");
- factory.init(args);
+ JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
assertTokenStreamContents(ts,
@@ -84,10 +77,9 @@ public class TestJapaneseTokenizerFactor
"é¢è¥¿å½é空港,é¢è¥¿ å½é 空港,ã«ã³ãµã¤ ã³ã¯ãµã¤ ã¯ã¦ã³ã¦,ãã¹ãåè©\n" +
"# Custom reading for sumo wrestler\n" +
"æéé¾,æéé¾,ã¢ãµã·ã§ã¦ãªã¥ã¦,ã«ã¹ã¿ã 人å\n";
- JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("userDictionary", "userdict.txt");
- factory.init(args);
+ JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(userDict));
TokenStream ts = factory.create(new StringReader("é¢è¥¿å½é空港ã«è¡ã£ã"));
assertTokenStreamContents(ts,
@@ -99,15 +91,13 @@ public class TestJapaneseTokenizerFactor
* Test preserving punctuation
*/
public void testPreservePunctuation() throws IOException {
- JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("discardPunctuation", "false");
- factory.init(args);
+ JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args);
factory.inform(new StringMockResourceLoader(""));
TokenStream ts = factory.create(
new StringReader("ä»ãã«ã¦ã§ã¼ã«ãã¾ãããæ¥é±ã®é æ¥æ¬ã«æ»ãã¾ãã楽ãã¿ã«ãã¦ãã¾ãï¼ã寿å¸ãé£ã¹ãããªããã")
);
- System.out.println(ts.toString());
assertTokenStreamContents(ts,
new String[] { "ä»", "ãã«ã¦ã§ã¼", "ã«", "ã", "ã¾ã", "ã", "ã",
"æ¥é±", "ã®", "é ", "æ¥æ¬", "ã«", "æ»ã", "ã¾ã", "ã",
@@ -115,4 +105,16 @@ public class TestJapaneseTokenizerFactor
"ã", "寿å¸", "ã", "é£ã¹", "ãã", "ãª", "ã", "ã", "ã"}
);
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new JapaneseTokenizerFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -25,7 +25,6 @@ import morfologik.stemming.PolishStemmer
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.morfologik.MorfologikFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
@@ -52,24 +51,10 @@ public class MorfologikFilterFactory ext
/** Schema attribute. */
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
- /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
- public MorfologikFilterFactory() {}
-
- /**
- * {@inheritDoc}
- */
- @Override
- public TokenStream create(TokenStream ts) {
- return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
- }
-
- /**
- * {@inheritDoc}
- */
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
- String dictionaryName = args.get(DICTIONARY_SCHEMA_ATTRIBUTE);
+ /** Creates a new MorfologikFilterFactory */
+ public MorfologikFilterFactory(Map<String,String> args) {
+ super(args);
+ String dictionaryName = args.remove(DICTIONARY_SCHEMA_ATTRIBUTE);
if (dictionaryName != null && !dictionaryName.isEmpty()) {
try {
DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
@@ -81,5 +66,13 @@ public class MorfologikFilterFactory ext
+ dictionaryName);
}
}
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ @Override
+ public TokenStream create(TokenStream ts) {
+ return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -22,8 +22,8 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test for {@link MorfologikFilterFactory}.
@@ -32,13 +32,23 @@ public class TestMorfologikFilterFactory
public void testCreateDictionary() throws Exception {
StringReader reader = new StringReader("rowery bilety");
Map<String,String> initParams = new HashMap<String,String>();
- initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
- "morfologik");
- MorfologikFilterFactory factory = new MorfologikFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- factory.init(initParams);
- TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- reader));
- assertTokenStreamContents(ts, new String[] {"rower", "bilet"});
+ initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE, "morfologik");
+ initParams.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
+ MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
+ assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
+ }
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new MorfologikFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -27,12 +27,11 @@ import org.apache.commons.codec.language
import org.apache.commons.codec.language.bm.RuleType;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link BeiderMorseFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_bm" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -42,37 +41,35 @@ import org.apache.lucene.analysis.util.T
* </filter>
* </analyzer>
* </fieldType></pre>
- *
*/
public class BeiderMorseFilterFactory extends TokenFilterFactory {
- private PhoneticEngine engine;
- private LanguageSet languageSet;
+ private final PhoneticEngine engine;
+ private final LanguageSet languageSet;
- /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
- public BeiderMorseFilterFactory() {}
-
- @Override
- public void init(Map<String,String> args) {
- super.init(args);
-
+ /** Creates a new BeiderMorseFilterFactory */
+ public BeiderMorseFilterFactory(Map<String,String> args) {
+ super(args);
// PhoneticEngine = NameType + RuleType + concat
// we use common-codec's defaults: GENERIC + APPROX + true
- String nameTypeArg = args.get("nameType");
+ String nameTypeArg = args.remove("nameType");
NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);
- String ruleTypeArg = args.get("ruleType");
+ String ruleTypeArg = args.remove("ruleType");
RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);
- boolean concat = getBoolean("concat", true);
+ boolean concat = getBoolean(args, "concat", true);
engine = new PhoneticEngine(nameType, ruleType, concat);
// LanguageSet: defaults to automagic, otherwise a comma-separated list.
- String languageSetArg = args.get("languageSet");
+ String languageSetArg = args.remove("languageSet");
if (languageSetArg == null || languageSetArg.equals("auto")) {
languageSet = null;
} else {
languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
}
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -21,19 +21,17 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link DoubleMetaphoneFilter}.
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/>
* </analyzer>
* </fieldType></pre>
- *
*/
public class DoubleMetaphoneFilterFactory extends TokenFilterFactory
{
@@ -44,20 +42,16 @@ public class DoubleMetaphoneFilterFactor
/** default maxCodeLength if not specified */
public static final int DEFAULT_MAX_CODE_LENGTH = 4;
- private boolean inject = true;
- private int maxCodeLength = DEFAULT_MAX_CODE_LENGTH;
+ private final boolean inject;
+ private final int maxCodeLength;
- /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
- public DoubleMetaphoneFilterFactory() {}
-
- @Override
- public void init(Map<String, String> args) {
- super.init(args);
-
- inject = getBoolean(INJECT, true);
-
- if (args.get(MAX_CODE_LENGTH) != null) {
- maxCodeLength = Integer.parseInt(args.get(MAX_CODE_LENGTH));
+ /** Creates a new DoubleMetaphoneFilterFactory */
+ public DoubleMetaphoneFilterFactory(Map<String,String> args) {
+ super(args);
+ inject = getBoolean(args, INJECT, true);
+ maxCodeLength = getInt(args, MAX_CODE_LENGTH, DEFAULT_MAX_CODE_LENGTH);
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -27,7 +27,6 @@ import java.util.Map;
import org.apache.commons.codec.Encoder;
import org.apache.commons.codec.language.*;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -49,7 +48,7 @@ import org.apache.lucene.analysis.util.T
* support this then specifying this is an error.</dd>
* </dl>
*
- * <pre class="prettyprint" >
+ * <pre class="prettyprint">
* <fieldType name="text_phonetic" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -59,9 +58,7 @@ import org.apache.lucene.analysis.util.T
*
* @see PhoneticFilter
*/
-public class PhoneticFilterFactory extends TokenFilterFactory
- implements ResourceLoaderAware
-{
+public class PhoneticFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
/** parameter name: either a short name or a full class name */
public static final String ENCODER = "encoder";
/** parameter name: true if encoded tokens should be added as synonyms */
@@ -82,33 +79,40 @@ public class PhoneticFilterFactory exten
registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class);
}
- boolean inject = true; //accessed by the test
- private String name = null;
+ final boolean inject; //accessed by the test
+ private final String name;
+ private final Integer maxCodeLength;
private Class<? extends Encoder> clazz = null;
private Method setMaxCodeLenMethod = null;
- private Integer maxCodeLength = null;
- /** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
- public PhoneticFilterFactory() {}
+ /** Creates a new PhoneticFilterFactory */
+ public PhoneticFilterFactory(Map<String,String> args) {
+ super(args);
+ inject = getBoolean(args, INJECT, true);
+ name = args.remove(ENCODER);
+ if (name == null) {
+ throw new IllegalArgumentException("Missing required parameter: " + ENCODER
+ + " [" + registry.keySet() + "]");
+ }
+ String v = args.remove(MAX_CODE_LENGTH);
+ if (v != null) {
+ maxCodeLength = Integer.valueOf(v);
+ } else {
+ maxCodeLength = null;
+ }
+ if (!args.isEmpty()) {
+ throw new IllegalArgumentException("Unknown parameters: " + args);
+ }
+ }
@Override
public void inform(ResourceLoader loader) throws IOException {
-
- inject = getBoolean(INJECT, true);
-
- String name = args.get( ENCODER );
- if( name == null ) {
- throw new IllegalArgumentException("Missing required parameter: " + ENCODER
- + " [" + registry.keySet() + "]");
- }
clazz = registry.get(name.toUpperCase(Locale.ROOT));
if( clazz == null ) {
clazz = resolveEncoder(name, loader);
}
- String v = args.get(MAX_CODE_LENGTH);
- if (v != null) {
- maxCodeLength = Integer.valueOf(v);
+ if (maxCodeLength != null) {
try {
setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
} catch (Exception e) {
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.phone
*/
import java.io.StringReader;
-import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -29,10 +28,7 @@ import org.apache.lucene.analysis.TokenS
/** Simple tests for {@link BeiderMorseFilterFactory} */
public class TestBeiderMorseFilterFactory extends BaseTokenStreamTestCase {
public void testBasics() throws Exception {
- BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
- factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
- Map<String, String> args = Collections.emptyMap();
- factory.init(args);
+ BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new HashMap<String,String>());
TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(ts,
new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
@@ -42,10 +38,9 @@ public class TestBeiderMorseFilterFactor
}
public void testLanguageSet() throws Exception {
- BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("languageSet", "polish");
- factory.init(args);
+ BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(ts,
new String[] { "vDmbYrk", "vDmbirk", "vambYrk", "vambirk", "vimbYrk", "vimbirk" },
@@ -55,11 +50,10 @@ public class TestBeiderMorseFilterFactor
}
public void testOptions() throws Exception {
- BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("nameType", "ASHKENAZI");
args.put("ruleType", "EXACT");
- factory.init(args);
+ BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(ts,
new String[] { "vajnberk" },
@@ -67,4 +61,16 @@ public class TestBeiderMorseFilterFactor
new int[] { 8 },
new int[] { 1 });
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new BeiderMorseFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java?rev=1465224&r1=1465223&r2=1465224&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDoubleMetaphoneFilterFactory.java Sat Apr 6 11:08:17 2013
@@ -30,8 +30,7 @@ import org.apache.lucene.analysis.tokena
public class TestDoubleMetaphoneFilterFactory extends BaseTokenStreamTestCase {
public void testDefaults() throws Exception {
- DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
- factory.init(new HashMap<String, String>());
+ DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap<String, String>());
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
TokenStream filteredStream = factory.create(inputStream);
@@ -40,11 +39,10 @@ public class TestDoubleMetaphoneFilterFa
}
public void testSettingSizeAndInject() throws Exception {
- DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
- Map<String, String> parameters = new HashMap<String, String>();
+ Map<String,String> parameters = new HashMap<String,String>();
parameters.put("inject", "false");
parameters.put("maxCodeLength", "8");
- factory.init(parameters);
+ DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(parameters);
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
@@ -57,8 +55,7 @@ public class TestDoubleMetaphoneFilterFa
* Ensure that reset() removes any state (buffered tokens)
*/
public void testReset() throws Exception {
- DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory();
- factory.init(new HashMap<String, String>());
+ DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new HashMap<String, String>());
TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
TokenStream filteredStream = factory.create(inputStream);
@@ -74,4 +71,16 @@ public class TestDoubleMetaphoneFilterFa
// ensure there are no more tokens, such as ANTRNXNL
assertFalse(filteredStream.incrementToken());
}
+
+ /** Test that bogus arguments result in exception */
+ public void testBogusArguments() throws Exception {
+ try {
+ new DoubleMetaphoneFilterFactory(new HashMap<String,String>() {{
+ put("bogusArg", "bogusValue");
+ }});
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("Unknown parameters"));
+ }
+ }
}