You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2015/10/08 11:21:36 UTC

svn commit: r1707458 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/analysis/morfologik/ lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/ lucene/analysis/morfologik/src/test-files/ lucene/analysis/morfologik/src/test/o...

Author: dweiss
Date: Thu Oct  8 09:21:35 2015
New Revision: 1707458

URL: http://svn.apache.org/viewvc?rev=1707458&view=rev
Log:
LUCENE-6833: Upgraded Morfologik to version 2.0.1. The 'dictionary' attribute has been
reverted back and now points at the dictionary resource to be loaded instead of the default Polish dictionary.


Added:
    lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test-files/
      - copied from r1707457, lucene/dev/trunk/lucene/analysis/morfologik/src/test-files/
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-2.0.1.jar.sha1
      - copied unchanged from r1707457, lucene/dev/trunk/lucene/licenses/morfologik-fsa-2.0.1.jar.sha1
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-polish-2.0.1.jar.sha1
      - copied unchanged from r1707457, lucene/dev/trunk/lucene/licenses/morfologik-polish-2.0.1.jar.sha1
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-2.0.1.jar.sha1
      - copied unchanged from r1707457, lucene/dev/trunk/lucene/licenses/morfologik-stemming-2.0.1.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/morfologik-fsa-2.0.1.jar.sha1
      - copied unchanged from r1707457, lucene/dev/trunk/solr/licenses/morfologik-fsa-2.0.1.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/morfologik-polish-2.0.1.jar.sha1
      - copied unchanged from r1707457, lucene/dev/trunk/solr/licenses/morfologik-polish-2.0.1.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/morfologik-stemming-2.0.1.jar.sha1
      - copied unchanged from r1707457, lucene/dev/trunk/solr/licenses/morfologik-stemming-2.0.1.jar.sha1
Removed:
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-1.10.0.jar.sha1
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-polish-1.10.0.jar.sha1
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-1.10.0.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/morfologik-fsa-1.10.0.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/morfologik-polish-1.10.0.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/morfologik-stemming-1.10.0.jar.sha1
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml
    lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
    lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
    lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
    lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
    lucene/dev/branches/branch_5x/lucene/ivy-versions.properties   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
    lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/   (props changed)

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu Oct  8 09:21:35 2015
@@ -152,8 +152,12 @@ Other
 * LUCENE-6761: MatchAllDocsQuery's Scorers do not expose approximations
   anymore. (Adrien Grand)
 
-* LUCENE-6775: Improved MorfologikFilterFactory to allow loading of
-  custom dictionaries from ResourceLoader.  (Uwe Schindler)
+* LUCENE-6775, LUCENE-6833: Improved MorfologikFilterFactory to allow 
+  loading of custom dictionaries from ResourceLoader. Upgraded 
+  Morfologik to version 2.0.1. The 'dictionary' attribute has been
+  reverted back and now points at the dictionary resource to be 
+  loaded instead of the default Polish dictionary.
+  (Uwe Schindler, Dawid Weiss)
 
 * LUCENE-6797: Make GeoCircle an interface and use a factory to create
   it, to eventually handle degenerate cases (Karl Wright via Mike

Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/build.xml Thu Oct  8 09:21:35 2015
@@ -18,9 +18,8 @@
  -->
 
 <project name="analyzers-morfologik" default="default">
-
   <description>
-    Analyzer for indexing Polish
+    Analyzer for dictionary stemming, built-in Polish dictionary
   </description>
 
   <import file="../analysis-module-build.xml"/>
@@ -30,6 +29,12 @@
     <fileset dir="lib"/>
     <path refid="base.classpath"/>
   </path>
+  
+  
+  <path id="test.classpath">
+    <path refid="test.base.classpath" />
+    <pathelement path="src/test-files" />
+  </path>
 
   <target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
 </project>

Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java Thu Oct  8 09:21:35 2015
@@ -20,6 +20,9 @@ package org.apache.lucene.analysis.morfo
 
 import java.io.Reader;
 
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.polish.PolishStemmer;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardFilter;
@@ -30,27 +33,23 @@ import org.apache.lucene.analysis.standa
  * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
  */
 public class MorfologikAnalyzer extends Analyzer {
-  private final String dictionary;
+  private final Dictionary dictionary;
 
   /**
-   * Builds an analyzer with an explicit dictionary resource.
-   * 
-   * @param dictionaryResource A constant specifying which dictionary to choose. The
-   * dictionary resource must be named <code>morfologik/dictionaries/{dictionaryResource}.dict</code>
-   * and have an associated <code>.info</code> metadata file. See the Morfologik project
-   * for details.
+   * Builds an analyzer with an explicit {@link Dictionary} resource.
    * 
-   * @see <a href="http://morfologik.blogspot.com/">http://morfologik.blogspot.com/</a>
+   * @param dictionary A prebuilt automaton with inflected and base word forms.
+   * @see <a href="https://github.com/morfologik/">https://github.com/morfologik/</a>
    */
-  public MorfologikAnalyzer(final String dictionaryResource) {
-    this.dictionary = dictionaryResource;
+  public MorfologikAnalyzer(final Dictionary dictionary) {
+    this.dictionary = dictionary;
   }
   
   /**
    * Builds an analyzer with the default Morfologik's Polish dictionary.
    */
   public MorfologikAnalyzer() {
-    this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
+    this(new PolishStemmer().getDictionary());
   }
 
   /**

Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java Thu Oct  8 09:21:35 2015
@@ -18,17 +18,16 @@ package org.apache.lucene.analysis.morfo
  */
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.Objects;
 import java.util.regex.Pattern;
 
 import morfologik.stemming.Dictionary;
 import morfologik.stemming.DictionaryLookup;
 import morfologik.stemming.IStemmer;
 import morfologik.stemming.WordData;
+import morfologik.stemming.polish.PolishStemmer;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -70,21 +69,10 @@ public class MorfologikFilter extends To
    * Creates a filter with the default (Polish) dictionary.
    */
   public MorfologikFilter(final TokenStream in) {
-    this(in, DictionaryHolder.DEFAULT_DICT);
+    this(in, new PolishStemmer().getDictionary());
   }
 
   /**
-   * Creates a filter with a given dictionary resource.
-   *
-   * @param in input token stream.
-   * @param dictResource Dictionary resource name in classpath.
-   */
-  public MorfologikFilter(final TokenStream in, final String dictResource) {
-    this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE.equals(dictResource) ?
-        DictionaryHolder.DEFAULT_DICT : loadDictionaryResource(dictResource));
-  }
-  
-  /**
    * Creates a filter with a given dictionary.
    *
    * @param in input token stream.
@@ -180,23 +168,4 @@ public class MorfologikFilter extends To
     tagsList.clear();
     super.reset();
   }
-  
-  /** This method was added, because Morfologik uses context classloader and fails to load from our classloader (bug with absolute path). */
-  static Dictionary loadDictionaryResource(String resource) {
-    Objects.requireNonNull(resource, "Morfologik language code may not be null");
-    final String dictPath = "/morfologik/dictionaries/" + resource + ".dict";
-    final String metaPath = Dictionary.getExpectedFeaturesName(dictPath);
-
-    try (final InputStream dictIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(dictPath), "Unable to find Morfologik dictionary: " + dictPath);
-        final InputStream metaIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(metaPath), "Unable to find Morfologik metadata: " + metaPath)) {
-      return Dictionary.readAndClose(dictIn, metaIn);
-    } catch (IOException ioe) {
-      throw new RuntimeException("IOException while loading Morfologik dictionary and metadata.", ioe);
-    }
-  }
-
-  /** This holder is for the default Polish dictionary */
-  static final class DictionaryHolder {
-    static final Dictionary DEFAULT_DICT = loadDictionaryResource(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
-  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java Thu Oct  8 09:21:35 2015
@@ -19,11 +19,12 @@ package org.apache.lucene.analysis.morfo
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 
 import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryMetadata;
+import morfologik.stemming.polish.PolishStemmer;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.ResourceLoader;
@@ -31,87 +32,47 @@ import org.apache.lucene.analysis.util.R
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
- * Filter factory for {@link MorfologikFilter}. For backward compatibility polish
- * dictionary is used as default. You can change dictionary resource 
- * by dictionary-resource parameter:
- * <pre class="prettyprint">
- * &lt;fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"&gt;
- *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.MorfologikFilterFactory" dictionary-resource="pl" /&gt;
- *   &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre>
+ * Filter factory for {@link MorfologikFilter}. 
  * 
- * <p>Alternatively, you can pass in the filenames of FSA ({@code ".dict"} and features "{@code ".info"}" file
- * (if the features file is not given, its name is derived from the FSA file):
+ * <p>An explicit resource name of the dictionary ({@code ".dict"}) can be 
+ * provided via the <code>dictionary</code> attribute, as the example below demonstrates:
  * <pre class="prettyprint">
- * &lt;fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"&gt;
+ * &lt;fieldType name="text_mylang" class="solr.TextField" positionIncrementGap="100"&gt;
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.MorfologikFilterFactory" dictionary-fsa-file="mylang.dict" dictionary-features-file="mylang.info" /&gt;
+ *     &lt;filter class="solr.MorfologikFilterFactory" dictionary="mylang.dict" /&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
  * 
+ * <p>If the dictionary attribute is not provided, the Polish dictionary is loaded
+ * and used by default. 
+ * 
  * @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
  */
 public class MorfologikFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-  /**
-   * The default dictionary resource (for Polish). 
-   */
-  public static final String DEFAULT_DICTIONARY_RESOURCE = "pl";
-
-  /** Schema attribute. */
-  @Deprecated
-  public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
-
-  /** Dictionary resource */
-  public static final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
-
-  /** Dictionary FSA file (should have {@code ".dict"} suffix), loaded from {@link ResourceLoader}. */
-  public static final String DICTIONARY_FSA_FILE_ATTRIBUTE = "dictionary-fsa-file";
-
-  /** Dictionary features/properties file, loaded from {@link ResourceLoader}. If not given, this
-   * loads the file with same name like {@link #DICTIONARY_FSA_FILE_ATTRIBUTE}, but with
-   * {@code ".info"} suffix.
-   */
-  public static final String DICTIONARY_FEATURES_FILE_ATTRIBUTE = "dictionary-features-file";
+  /** Dictionary resource attribute (should have {@code ".dict"} suffix), loaded from {@link ResourceLoader}. */
+  public static final String DICTIONARY_ATTRIBUTE = "dictionary";
+
+  /** {@link #DICTIONARY_ATTRIBUTE} value passed to {@link #inform}. */
+  private String resourceName;
 
-  private final String dictionaryFsaFile, dictionaryFeaturesFile, dictionaryResource;
-  private Dictionary dictionary; // initialized on inform()
+  /** Loaded {@link Dictionary}, initialized on {@link #inform(ResourceLoader)}. */
+  private Dictionary dictionary;
 
   /** Creates a new MorfologikFilterFactory */
   public MorfologikFilterFactory(Map<String,String> args) {
     super(args);
 
     // Be specific about no-longer-supported dictionary attribute.
-    String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
-    if (dictionaryName != null && !dictionaryName.isEmpty()) {
-      throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
-          + "longer supported (Morfologik now offers one unified Polish dictionary): " + dictionaryName
-          + ". Perhaps you wanted to use 'dictionary-resource' attribute instead?");
+    final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
+    String dictionaryResource = get(args, DICTIONARY_RESOURCE_ATTRIBUTE);
+    if (dictionaryResource != null && !dictionaryResource.isEmpty()) {
+      throw new IllegalArgumentException("The " + DICTIONARY_RESOURCE_ATTRIBUTE + " attribute is no "
+          + "longer supported. Use the '" + DICTIONARY_ATTRIBUTE + "' attribute instead (see LUCENE-6833).");
     }
 
-    // first check FSA and features (at least FSA must be given, features name is guessed):
-    dictionaryFsaFile = get(args, DICTIONARY_FSA_FILE_ATTRIBUTE);
-    dictionaryFeaturesFile = get(args, DICTIONARY_FEATURES_FILE_ATTRIBUTE,
-        (dictionaryFsaFile == null) ? null : Dictionary.getExpectedFeaturesName(dictionaryFsaFile));
-    
-    if (dictionaryFsaFile == null && dictionaryFeaturesFile == null) {
-      // if we have no FSA/features combination, we resolve the classpath resource:
-      dictionaryResource = get(args, DICTIONARY_RESOURCE_ATTRIBUTE, DEFAULT_DICTIONARY_RESOURCE);
-    } else if (dictionaryFsaFile == null || dictionaryFeaturesFile == null) {
-      // if we have incomplete FSA/features tuple in args
-      throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Missing '%s' or '%s' attribute.",
-          DICTIONARY_FSA_FILE_ATTRIBUTE, DICTIONARY_FEATURES_FILE_ATTRIBUTE));      
-    } else {
-      dictionaryResource = null;
-      if (get(args, DICTIONARY_RESOURCE_ATTRIBUTE) != null) {
-        // fail if both is given: FSA/features files + classpath resource
-        throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Cannot give '%s' and '%s'/'%s' at the same time.",
-            DICTIONARY_RESOURCE_ATTRIBUTE, DICTIONARY_FSA_FILE_ATTRIBUTE, DICTIONARY_FEATURES_FILE_ATTRIBUTE));
-      }
-    }
-    
+    resourceName = get(args, DICTIONARY_ATTRIBUTE);
+
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -119,16 +80,14 @@ public class MorfologikFilterFactory ext
 
   @Override
   public void inform(ResourceLoader loader) throws IOException {
-    if (dictionaryFsaFile != null) {
-      assert dictionaryFeaturesFile != null;
-      assert dictionaryResource == null;
-      try (final InputStream dictIn = loader.openResource(dictionaryFsaFile);
-          final InputStream metaIn = loader.openResource(dictionaryFeaturesFile)) {
-        this.dictionary = Dictionary.readAndClose(dictIn, metaIn);
-      }
+    if (resourceName == null) {
+      // Get the dictionary lazily, does not hold up memory.
+      this.dictionary = new PolishStemmer().getDictionary();
     } else {
-      assert dictionaryResource != null;
-      this.dictionary = MorfologikFilter.loadDictionaryResource(dictionaryResource);
+      try (InputStream dict = loader.openResource(resourceName);
+           InputStream meta = loader.openResource(DictionaryMetadata.getExpectedMetadataFileName(resourceName))) {
+        this.dictionary = Dictionary.read(dict, meta);
+      }
     }
   }
 

Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Thu Oct  8 09:21:35 2015
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.morfo
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringReader;
 import java.util.Collections;
 import java.util.HashMap;
@@ -31,51 +33,59 @@ import org.apache.lucene.analysis.util.R
  * Test for {@link MorfologikFilterFactory}.
  */
 public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
-  final ResourceLoader loader = new ClasspathResourceLoader(getClass());
+  private static class ForbidResourcesLoader implements ResourceLoader {
+    @Override
+    public InputStream openResource(String resource) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public <T> T newInstance(String cname, Class<T> expectedType) {
+      throw new UnsupportedOperationException();
+    }
+  }
 
   public void testDefaultDictionary() throws Exception {
     StringReader reader = new StringReader("rowery bilety");
     MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
-    factory.inform(loader);
+    factory.inform(new ForbidResourcesLoader());
     TokenStream stream = whitespaceMockTokenizer(reader);
     stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
   }
-  
-  public void testResourceDictionary() throws Exception {
-    StringReader reader = new StringReader("rowery bilety");
-    Map<String,String> params = new HashMap<>();
-    params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
-    MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
-    factory.inform(loader);
-    TokenStream stream = whitespaceMockTokenizer(reader);
-    stream = factory.create(stream);
-    assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
-  }
-  
-  public void testResourceLoaderDictionary1() throws Exception {
-    StringReader reader = new StringReader("rowery bilety");
+
+  public void testExplicitDictionary() throws Exception {
+    final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
+
+    StringReader reader = new StringReader("inflected1 inflected2");
     Map<String,String> params = new HashMap<>();
-    params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
+    params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict");
     MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
     factory.inform(loader);
     TokenStream stream = whitespaceMockTokenizer(reader);
     stream = factory.create(stream);
-    assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
+    assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"});
   }
-  
-  public void testResourceLoaderDictionary2() throws Exception {
-    StringReader reader = new StringReader("rowery bilety");
-    Map<String,String> params = new HashMap<>();
-    params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
-    params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
-    MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
-    factory.inform(loader);
-    TokenStream stream = whitespaceMockTokenizer(reader);
-    stream = factory.create(stream);
-    assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
+
+  public void testMissingDictionary() throws Exception {
+    final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
+
+    try {
+      Map<String,String> params = new HashMap<>();
+      params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
+      MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
+      factory.inform(loader);
+      fail();
+    } catch (IOException e) {
+      assertTrue(e.getMessage().contains("Resource not found"));
+    }
   }
-  
+
   /** Test that bogus arguments result in exception */
   public void testBogusArguments() throws Exception {
     try {
@@ -87,40 +97,4 @@ public class TestMorfologikFilterFactory
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
-  
-  public void testIncompatibleArgs1() throws Exception {
-    try {
-      HashMap<String,String> params = new HashMap<String,String>();
-      params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
-      params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
-      new MorfologikFilterFactory(params);
-      fail();
-    } catch (IllegalArgumentException expected) {
-      assertTrue(expected.getMessage().contains("at the same time"));
-    }
-  }
-  
-  public void testIncompatibleArgs2() throws Exception {
-    try {
-      HashMap<String,String> params = new HashMap<String,String>();
-      params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
-      params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
-      params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
-      new MorfologikFilterFactory(params);
-      fail();
-    } catch (IllegalArgumentException expected) {
-      assertTrue(expected.getMessage().contains("at the same time"));
-    }
-  }
-  
-  public void testMissingArgs1() throws Exception {
-    try {
-      HashMap<String,String> params = new HashMap<String,String>();
-      params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
-      new MorfologikFilterFactory(params);
-      fail();
-    } catch (IllegalArgumentException expected) {
-      assertTrue(expected.getMessage().contains("Missing"));
-    }
-  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/ivy-versions.properties?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/branch_5x/lucene/ivy-versions.properties Thu Oct  8 09:21:35 2015
@@ -211,7 +211,7 @@ org.bouncycastle.version = 1.45
 
 /org.carrot2/carrot2-mini = 3.10.3
 
-org.carrot2.morfologik.version = 1.10.0
+org.carrot2.morfologik.version = 2.0.1
 /org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
 /org.carrot2/morfologik-polish = ${org.carrot2.morfologik.version}
 /org.carrot2/morfologik-stemming = ${org.carrot2.morfologik.version}

Modified: lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt Thu Oct  8 09:21:35 2015
@@ -1,6 +1,6 @@
 
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, 

Modified: lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt?rev=1707458&r1=1707457&r2=1707458&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt Thu Oct  8 09:21:35 2015
@@ -1,6 +1,6 @@
 
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification,