You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2015/09/02 22:55:29 UTC
svn commit: r1700904 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/CHANGES.txt
lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
Author: uschindler
Date: Wed Sep 2 20:55:29 2015
New Revision: 1700904
URL: http://svn.apache.org/r1700904
Log:
Merged revision(s) 1700903 from lucene/dev/trunk:
LUCENE-6774: Remove classloader hack in MorfologikFilter #2
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1700904&r1=1700903&r2=1700904&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Wed Sep 2 20:55:29 2015
@@ -57,7 +57,8 @@ Bug Fixes
* LUCENE-6748: UsageTrackingQueryCachingPolicy no longer caches trivial queries
like MatchAllDocsQuery. (Adrien Grand)
-* LUCENE-6774: Remove solr hack in MorfologikFilter. (Robert Muir)
+* LUCENE-6774: Remove classloader hack in MorfologikFilter. (Robert Muir,
+ Uwe Schindler)
Other
Modified: lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java?rev=1700904&r1=1700903&r2=1700904&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java Wed Sep 2 20:55:29 2015
@@ -1,4 +1,3 @@
-// -*- c-basic-offset: 2 -*-
package org.apache.lucene.analysis.morfologik;
/*
@@ -19,10 +18,17 @@ package org.apache.lucene.analysis.morfo
*/
import java.io.IOException;
-import java.util.*;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
import java.util.regex.Pattern;
-import morfologik.stemming.*;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -30,7 +36,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.*;
+import org.apache.lucene.util.CharsRefBuilder;
/**
* {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
@@ -64,22 +70,33 @@ public class MorfologikFilter extends To
* Creates a filter with the default (Polish) dictionary.
*/
public MorfologikFilter(final TokenStream in) {
- this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
+ this(in, DictionaryHolder.DEFAULT_DICT);
}
/**
* Creates a filter with a given dictionary resource.
*
* @param in input token stream.
- * @param dict Dictionary resource from classpath.
+ * @param dictResource Dictionary resource name in classpath.
*/
- public MorfologikFilter(final TokenStream in, final String dict) {
+ public MorfologikFilter(final TokenStream in, final String dictResource) {
+ this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE.equals(dictResource) ?
+ DictionaryHolder.DEFAULT_DICT : loadDictionaryResource(dictResource));
+ }
+
+ /**
+ * Creates a filter with a given dictionary.
+ *
+ * @param in input token stream.
+ * @param dict Dictionary to use for stemming.
+ */
+ public MorfologikFilter(final TokenStream in, final Dictionary dict) {
super(in);
this.input = in;
- this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict));
+ this.stemmer = new DictionaryLookup(dict);
this.lemmaList = Collections.emptyList();
}
-
+
/**
* A pattern used to split lemma forms.
*/
@@ -163,4 +180,23 @@ public class MorfologikFilter extends To
tagsList.clear();
super.reset();
}
+
+ /** This method was added, because Morfologik uses context classloader and fails to load from our classloader (bug with absolute path). */
+ static Dictionary loadDictionaryResource(String resource) {
+ Objects.requireNonNull(resource, "Morfologik language code may not be null");
+ final String dictPath = "/morfologik/dictionaries/" + resource + ".dict";
+ final String metaPath = Dictionary.getExpectedFeaturesName(dictPath);
+
+ try (final InputStream dictIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(dictPath), "Unable to find Morfologik dictionary: " + dictPath);
+ final InputStream metaIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(metaPath), "Unable to find Morfologik metadata: " + metaPath)) {
+ return Dictionary.readAndClose(dictIn, metaIn);
+ } catch (IOException ioe) {
+ throw new RuntimeException("IOException while loading Morfologik dictionary and metadata.", ioe);
+ }
+ }
+
+ /** This holder is for the default Polish dictionary */
+ static final class DictionaryHolder {
+ static final Dictionary DEFAULT_DICT = loadDictionaryResource(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
+ }
}