You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2015/10/08 11:21:36 UTC
svn commit: r1707458 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/analysis/morfologik/
lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/
lucene/analysis/morfologik/src/test-files/
lucene/analysis/morfologik/src/test/o...
Author: dweiss
Date: Thu Oct 8 09:21:35 2015
New Revision: 1707458
URL: http://svn.apache.org/viewvc?rev=1707458&view=rev
Log:
LUCENE-6833: Upgraded Morfologik to version 2.0.1. The 'dictionary' attribute has been
reverted back and now points at the dictionary resource to be loaded instead of the default Polish dictionary.
Added:
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test-files/
- copied from r1707457, lucene/dev/trunk/lucene/analysis/morfologik/src/test-files/
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-2.0.1.jar.sha1
- copied unchanged from r1707457, lucene/dev/trunk/lucene/licenses/morfologik-fsa-2.0.1.jar.sha1
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-polish-2.0.1.jar.sha1
- copied unchanged from r1707457, lucene/dev/trunk/lucene/licenses/morfologik-polish-2.0.1.jar.sha1
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-2.0.1.jar.sha1
- copied unchanged from r1707457, lucene/dev/trunk/lucene/licenses/morfologik-stemming-2.0.1.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/morfologik-fsa-2.0.1.jar.sha1
- copied unchanged from r1707457, lucene/dev/trunk/solr/licenses/morfologik-fsa-2.0.1.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/morfologik-polish-2.0.1.jar.sha1
- copied unchanged from r1707457, lucene/dev/trunk/solr/licenses/morfologik-polish-2.0.1.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/morfologik-stemming-2.0.1.jar.sha1
- copied unchanged from r1707457, lucene/dev/trunk/solr/licenses/morfologik-stemming-2.0.1.jar.sha1
Removed:
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-1.10.0.jar.sha1
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-polish-1.10.0.jar.sha1
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-1.10.0.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/morfologik-fsa-1.10.0.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/morfologik-polish-1.10.0.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/morfologik-stemming-1.10.0.jar.sha1
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
lucene/dev/branches/branch_5x/lucene/ivy-versions.properties (contents, props changed)
lucene/dev/branches/branch_5x/lucene/licenses/ (props changed)
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/licenses/ (props changed)
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu Oct 8 09:21:35 2015
@@ -152,8 +152,12 @@ Other
* LUCENE-6761: MatchAllDocsQuery's Scorers do not expose approximations
anymore. (Adrien Grand)
-* LUCENE-6775: Improved MorfologikFilterFactory to allow loading of
- custom dictionaries from ResourceLoader. (Uwe Schindler)
+* LUCENE-6775, LUCENE-6833: Improved MorfologikFilterFactory to allow
+ loading of custom dictionaries from ResourceLoader. Upgraded
+ Morfologik to version 2.0.1. The 'dictionary' attribute has been
+ reverted back and now points at the dictionary resource to be
+ loaded instead of the default Polish dictionary.
+ (Uwe Schindler, Dawid Weiss)
* LUCENE-6797: Make GeoCircle an interface and use a factory to create
it, to eventually handle degenerate cases (Karl Wright via Mike
Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml Thu Oct 8 09:21:35 2015
@@ -18,9 +18,8 @@
-->
<project name="analyzers-morfologik" default="default">
-
<description>
- Analyzer for indexing Polish
+ Analyzer for dictionary stemming, built-in Polish dictionary
</description>
<import file="../analysis-module-build.xml"/>
@@ -30,6 +29,12 @@
<fileset dir="lib"/>
<path refid="base.classpath"/>
</path>
+
+
+ <path id="test.classpath">
+ <path refid="test.base.classpath" />
+ <pathelement path="src/test-files" />
+ </path>
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
</project>
Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java Thu Oct 8 09:21:35 2015
@@ -20,6 +20,9 @@ package org.apache.lucene.analysis.morfo
import java.io.Reader;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.polish.PolishStemmer;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
@@ -30,27 +33,23 @@ import org.apache.lucene.analysis.standa
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
*/
public class MorfologikAnalyzer extends Analyzer {
- private final String dictionary;
+ private final Dictionary dictionary;
/**
- * Builds an analyzer with an explicit dictionary resource.
- *
- * @param dictionaryResource A constant specifying which dictionary to choose. The
- * dictionary resource must be named <code>morfologik/dictionaries/{dictionaryResource}.dict</code>
- * and have an associated <code>.info</code> metadata file. See the Morfologik project
- * for details.
+ * Builds an analyzer with an explicit {@link Dictionary} resource.
*
- * @see <a href="http://morfologik.blogspot.com/">http://morfologik.blogspot.com/</a>
+ * @param dictionary A prebuilt automaton with inflected and base word forms.
+ * @see <a href="https://github.com/morfologik/">https://github.com/morfologik/</a>
*/
- public MorfologikAnalyzer(final String dictionaryResource) {
- this.dictionary = dictionaryResource;
+ public MorfologikAnalyzer(final Dictionary dictionary) {
+ this.dictionary = dictionary;
}
/**
* Builds an analyzer with the default Morfologik's Polish dictionary.
*/
public MorfologikAnalyzer() {
- this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
+ this(new PolishStemmer().getDictionary());
}
/**
Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java Thu Oct 8 09:21:35 2015
@@ -18,17 +18,16 @@ package org.apache.lucene.analysis.morfo
*/
import java.io.IOException;
-import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.Objects;
import java.util.regex.Pattern;
import morfologik.stemming.Dictionary;
import morfologik.stemming.DictionaryLookup;
import morfologik.stemming.IStemmer;
import morfologik.stemming.WordData;
+import morfologik.stemming.polish.PolishStemmer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -70,21 +69,10 @@ public class MorfologikFilter extends To
* Creates a filter with the default (Polish) dictionary.
*/
public MorfologikFilter(final TokenStream in) {
- this(in, DictionaryHolder.DEFAULT_DICT);
+ this(in, new PolishStemmer().getDictionary());
}
/**
- * Creates a filter with a given dictionary resource.
- *
- * @param in input token stream.
- * @param dictResource Dictionary resource name in classpath.
- */
- public MorfologikFilter(final TokenStream in, final String dictResource) {
- this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE.equals(dictResource) ?
- DictionaryHolder.DEFAULT_DICT : loadDictionaryResource(dictResource));
- }
-
- /**
* Creates a filter with a given dictionary.
*
* @param in input token stream.
@@ -180,23 +168,4 @@ public class MorfologikFilter extends To
tagsList.clear();
super.reset();
}
-
- /** This method was added, because Morfologik uses context classloader and fails to load from our classloader (bug with absolute path). */
- static Dictionary loadDictionaryResource(String resource) {
- Objects.requireNonNull(resource, "Morfologik language code may not be null");
- final String dictPath = "/morfologik/dictionaries/" + resource + ".dict";
- final String metaPath = Dictionary.getExpectedFeaturesName(dictPath);
-
- try (final InputStream dictIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(dictPath), "Unable to find Morfologik dictionary: " + dictPath);
- final InputStream metaIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(metaPath), "Unable to find Morfologik metadata: " + metaPath)) {
- return Dictionary.readAndClose(dictIn, metaIn);
- } catch (IOException ioe) {
- throw new RuntimeException("IOException while loading Morfologik dictionary and metadata.", ioe);
- }
- }
-
- /** This holder is for the default Polish dictionary */
- static final class DictionaryHolder {
- static final Dictionary DEFAULT_DICT = loadDictionaryResource(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
- }
}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java Thu Oct 8 09:21:35 2015
@@ -19,11 +19,12 @@ package org.apache.lucene.analysis.morfo
import java.io.IOException;
import java.io.InputStream;
-import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryMetadata;
+import morfologik.stemming.polish.PolishStemmer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.ResourceLoader;
@@ -31,87 +32,47 @@ import org.apache.lucene.analysis.util.R
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
- * Filter factory for {@link MorfologikFilter}. For backward compatibility polish
- * dictionary is used as default. You can change dictionary resource
- * by dictionary-resource parameter:
- * <pre class="prettyprint">
- * <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100">
- * <analyzer>
- * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- * <filter class="solr.MorfologikFilterFactory" dictionary-resource="pl" />
- * </analyzer>
- * </fieldType></pre>
+ * Filter factory for {@link MorfologikFilter}.
*
- * <p>Alternatively, you can pass in the filenames of FSA ({@code ".dict"} and features "{@code ".info"}" file
- * (if the features file is not given, its name is derived from the FSA file):
+ * <p>An explicit resource name of the dictionary ({@code ".dict"}) can be
+ * provided via the <code>dictionary</code> attribute, as the example below demonstrates:
* <pre class="prettyprint">
- * <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100">
+ * <fieldType name="text_mylang" class="solr.TextField" positionIncrementGap="100">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- * <filter class="solr.MorfologikFilterFactory" dictionary-fsa-file="mylang.dict" dictionary-features-file="mylang.info" />
+ * <filter class="solr.MorfologikFilterFactory" dictionary="mylang.dict" />
* </analyzer>
* </fieldType></pre>
*
+ * <p>If the dictionary attribute is not provided, the Polish dictionary is loaded
+ * and used by default.
+ *
* @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
*/
public class MorfologikFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
- /**
- * The default dictionary resource (for Polish).
- */
- public static final String DEFAULT_DICTIONARY_RESOURCE = "pl";
-
- /** Schema attribute. */
- @Deprecated
- public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
-
- /** Dictionary resource */
- public static final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
-
- /** Dictionary FSA file (should have {@code ".dict"} suffix), loaded from {@link ResourceLoader}. */
- public static final String DICTIONARY_FSA_FILE_ATTRIBUTE = "dictionary-fsa-file";
-
- /** Dictionary features/properties file, loaded from {@link ResourceLoader}. If not given, this
- * loads the file with same name like {@link #DICTIONARY_FSA_FILE_ATTRIBUTE}, but with
- * {@code ".info"} suffix.
- */
- public static final String DICTIONARY_FEATURES_FILE_ATTRIBUTE = "dictionary-features-file";
+ /** Dictionary resource attribute (should have {@code ".dict"} suffix), loaded from {@link ResourceLoader}. */
+ public static final String DICTIONARY_ATTRIBUTE = "dictionary";
+
+ /** {@link #DICTIONARY_ATTRIBUTE} value passed to {@link #inform}. */
+ private String resourceName;
- private final String dictionaryFsaFile, dictionaryFeaturesFile, dictionaryResource;
- private Dictionary dictionary; // initialized on inform()
+ /** Loaded {@link Dictionary}, initialized on {@link #inform(ResourceLoader)}. */
+ private Dictionary dictionary;
/** Creates a new MorfologikFilterFactory */
public MorfologikFilterFactory(Map<String,String> args) {
super(args);
// Be specific about no-longer-supported dictionary attribute.
- String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
- if (dictionaryName != null && !dictionaryName.isEmpty()) {
- throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
- + "longer supported (Morfologik now offers one unified Polish dictionary): " + dictionaryName
- + ". Perhaps you wanted to use 'dictionary-resource' attribute instead?");
+ final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
+ String dictionaryResource = get(args, DICTIONARY_RESOURCE_ATTRIBUTE);
+ if (dictionaryResource != null && !dictionaryResource.isEmpty()) {
+ throw new IllegalArgumentException("The " + DICTIONARY_RESOURCE_ATTRIBUTE + " attribute is no "
+ + "longer supported. Use the '" + DICTIONARY_ATTRIBUTE + "' attribute instead (see LUCENE-6833).");
}
- // first check FSA and features (at least FSA must be given, features name is guessed):
- dictionaryFsaFile = get(args, DICTIONARY_FSA_FILE_ATTRIBUTE);
- dictionaryFeaturesFile = get(args, DICTIONARY_FEATURES_FILE_ATTRIBUTE,
- (dictionaryFsaFile == null) ? null : Dictionary.getExpectedFeaturesName(dictionaryFsaFile));
-
- if (dictionaryFsaFile == null && dictionaryFeaturesFile == null) {
- // if we have no FSA/features combination, we resolve the classpath resource:
- dictionaryResource = get(args, DICTIONARY_RESOURCE_ATTRIBUTE, DEFAULT_DICTIONARY_RESOURCE);
- } else if (dictionaryFsaFile == null || dictionaryFeaturesFile == null) {
- // if we have incomplete FSA/features tuple in args
- throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Missing '%s' or '%s' attribute.",
- DICTIONARY_FSA_FILE_ATTRIBUTE, DICTIONARY_FEATURES_FILE_ATTRIBUTE));
- } else {
- dictionaryResource = null;
- if (get(args, DICTIONARY_RESOURCE_ATTRIBUTE) != null) {
- // fail if both is given: FSA/features files + classpath resource
- throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Cannot give '%s' and '%s'/'%s' at the same time.",
- DICTIONARY_RESOURCE_ATTRIBUTE, DICTIONARY_FSA_FILE_ATTRIBUTE, DICTIONARY_FEATURES_FILE_ATTRIBUTE));
- }
- }
-
+ resourceName = get(args, DICTIONARY_ATTRIBUTE);
+
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -119,16 +80,14 @@ public class MorfologikFilterFactory ext
@Override
public void inform(ResourceLoader loader) throws IOException {
- if (dictionaryFsaFile != null) {
- assert dictionaryFeaturesFile != null;
- assert dictionaryResource == null;
- try (final InputStream dictIn = loader.openResource(dictionaryFsaFile);
- final InputStream metaIn = loader.openResource(dictionaryFeaturesFile)) {
- this.dictionary = Dictionary.readAndClose(dictIn, metaIn);
- }
+ if (resourceName == null) {
+ // Get the dictionary lazily, does not hold up memory.
+ this.dictionary = new PolishStemmer().getDictionary();
} else {
- assert dictionaryResource != null;
- this.dictionary = MorfologikFilter.loadDictionaryResource(dictionaryResource);
+ try (InputStream dict = loader.openResource(resourceName);
+ InputStream meta = loader.openResource(DictionaryMetadata.getExpectedMetadataFileName(resourceName))) {
+ this.dictionary = Dictionary.read(dict, meta);
+ }
}
}
Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Thu Oct 8 09:21:35 2015
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.morfo
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.InputStream;
import java.io.StringReader;
import java.util.Collections;
import java.util.HashMap;
@@ -31,51 +33,59 @@ import org.apache.lucene.analysis.util.R
* Test for {@link MorfologikFilterFactory}.
*/
public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
- final ResourceLoader loader = new ClasspathResourceLoader(getClass());
+ private static class ForbidResourcesLoader implements ResourceLoader {
+ @Override
+ public InputStream openResource(String resource) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public <T> T newInstance(String cname, Class<T> expectedType) {
+ throw new UnsupportedOperationException();
+ }
+ }
public void testDefaultDictionary() throws Exception {
StringReader reader = new StringReader("rowery bilety");
MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
- factory.inform(loader);
+ factory.inform(new ForbidResourcesLoader());
TokenStream stream = whitespaceMockTokenizer(reader);
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
}
-
- public void testResourceDictionary() throws Exception {
- StringReader reader = new StringReader("rowery bilety");
- Map<String,String> params = new HashMap<>();
- params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
- MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
- factory.inform(loader);
- TokenStream stream = whitespaceMockTokenizer(reader);
- stream = factory.create(stream);
- assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
- }
-
- public void testResourceLoaderDictionary1() throws Exception {
- StringReader reader = new StringReader("rowery bilety");
+
+ public void testExplicitDictionary() throws Exception {
+ final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
+
+ StringReader reader = new StringReader("inflected1 inflected2");
Map<String,String> params = new HashMap<>();
- params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
+ params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict");
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
factory.inform(loader);
TokenStream stream = whitespaceMockTokenizer(reader);
stream = factory.create(stream);
- assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
+ assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"});
}
-
- public void testResourceLoaderDictionary2() throws Exception {
- StringReader reader = new StringReader("rowery bilety");
- Map<String,String> params = new HashMap<>();
- params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
- params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
- MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
- factory.inform(loader);
- TokenStream stream = whitespaceMockTokenizer(reader);
- stream = factory.create(stream);
- assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
+
+ public void testMissingDictionary() throws Exception {
+ final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
+
+ try {
+ Map<String,String> params = new HashMap<>();
+ params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
+ MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
+ factory.inform(loader);
+ fail();
+ } catch (IOException e) {
+ assertTrue(e.getMessage().contains("Resource not found"));
+ }
}
-
+
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
try {
@@ -87,40 +97,4 @@ public class TestMorfologikFilterFactory
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
-
- public void testIncompatibleArgs1() throws Exception {
- try {
- HashMap<String,String> params = new HashMap<String,String>();
- params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
- params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
- new MorfologikFilterFactory(params);
- fail();
- } catch (IllegalArgumentException expected) {
- assertTrue(expected.getMessage().contains("at the same time"));
- }
- }
-
- public void testIncompatibleArgs2() throws Exception {
- try {
- HashMap<String,String> params = new HashMap<String,String>();
- params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
- params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
- params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
- new MorfologikFilterFactory(params);
- fail();
- } catch (IllegalArgumentException expected) {
- assertTrue(expected.getMessage().contains("at the same time"));
- }
- }
-
- public void testMissingArgs1() throws Exception {
- try {
- HashMap<String,String> params = new HashMap<String,String>();
- params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
- new MorfologikFilterFactory(params);
- fail();
- } catch (IllegalArgumentException expected) {
- assertTrue(expected.getMessage().contains("Missing"));
- }
- }
}
Modified: lucene/dev/branches/branch_5x/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/ivy-versions.properties?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/branch_5x/lucene/ivy-versions.properties Thu Oct 8 09:21:35 2015
@@ -211,7 +211,7 @@ org.bouncycastle.version = 1.45
/org.carrot2/carrot2-mini = 3.10.3
-org.carrot2.morfologik.version = 1.10.0
+org.carrot2.morfologik.version = 2.0.1
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
/org.carrot2/morfologik-polish = ${org.carrot2.morfologik.version}
/org.carrot2/morfologik-stemming = ${org.carrot2.morfologik.version}
Modified: lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt Thu Oct 8 09:21:35 2015
@@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2013 Dawid Weiss, Marcin MiÅkowski
+Copyright (c) 2007-2015 Dawid Weiss, Marcin MiÅkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
Modified: lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt Thu Oct 8 09:21:35 2015
@@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2013 Dawid Weiss, Marcin MiÅkowski
+Copyright (c) 2007-2015 Dawid Weiss, Marcin MiÅkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,