You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2012/03/28 09:33:55 UTC
svn commit: r1306198 - in /lucene/dev/trunk: dev-tools/maven/solr/core/
lucene/contrib/ solr/ solr/contrib/analysis-extras/
solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/
solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/
Author: dweiss
Date: Wed Mar 28 07:33:55 2012
New Revision: 1306198
URL: http://svn.apache.org/viewvc?rev=1306198&view=rev
Log:
SOLR-3272: Solr filter factory for MorfologikFilter (Polish lemmatisation).
(Rafał Kuć via Dawid Weiss, Steven Rowe, Uwe Schindler).
Added:
lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java
lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestMorfologikFilterFactory.java
Modified:
lucene/dev/trunk/dev-tools/maven/solr/core/pom.xml.template
lucene/dev/trunk/lucene/contrib/contrib-build.xml
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/common-build.xml
lucene/dev/trunk/solr/contrib/analysis-extras/build.xml
Modified: lucene/dev/trunk/dev-tools/maven/solr/core/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/maven/solr/core/pom.xml.template?rev=1306198&r1=1306197&r2=1306198&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/maven/solr/core/pom.xml.template (original)
+++ lucene/dev/trunk/dev-tools/maven/solr/core/pom.xml.template Wed Mar 28 07:33:55 2012
@@ -88,6 +88,11 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-morfologik</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-phonetic</artifactId>
<version>${project.version}</version>
</dependency>
Modified: lucene/dev/trunk/lucene/contrib/contrib-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/contrib-build.xml?rev=1306198&r1=1306197&r2=1306198&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/contrib-build.xml (original)
+++ lucene/dev/trunk/lucene/contrib/contrib-build.xml Wed Mar 28 07:33:55 2012
@@ -163,16 +163,31 @@
</target>
<property name="analyzers-uima.jar" value="${common.dir}/../modules/analysis/build/uima/lucene-analyzers-uima-${version}.jar"/>
- <target name="check-analyzers-uima-uptodate" unless="analyzers-uima.uptodate">
- <module-uptodate name="analysis/uima" jarfile="${analyzers-uima.jar}" property="analyzers-uima.uptodate"/>
- </target>
- <target name="jar-analyzers-uima" unless="analyzers-uima.uptodate" depends="check-analyzers-uima-uptodate">
- <ant dir="${common.dir}/../modules/analysis/uima" target="jar-core" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
+ <target name="check-analyzers-uima-uptodate" unless="analyzers-uima.uptodate">
+ <module-uptodate name="analysis/uima" jarfile="${analyzers-uima.jar}" property="analyzers-uima.uptodate"/>
+ </target>
+ <target name="jar-analyzers-uima" unless="analyzers-uima.uptodate" depends="check-analyzers-uima-uptodate">
+ <ant dir="${common.dir}/../modules/analysis/uima" target="jar-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
<property name="analyzers-uima.uptodate" value="true"/>
</target>
+ <property name="analyzers-morfologik.jar" value="${common.dir}/../modules/analysis/build/morfologik/lucene-analyzers-morfologik-${version}.jar"/>
+ <fileset id="analyzers-morfologik.fileset" dir="${common.dir}/../modules/analysis">
+ <include name="build/morfologik/lucene-analyzers-morfologik-${version}.jar" />
+ <include name="morfologik/lib/*.jar" />
+ </fileset>
+ <target name="check-analyzers-morfologik-uptodate" unless="analyzers-morfologik.uptodate">
+ <module-uptodate name="analysis/morfologik" jarfile="${analyzers-morfologik.jar}" property="analyzers-morfologik.uptodate"/>
+ </target>
+ <target name="jar-analyzers-morfologik" unless="analyzers-morfologik.uptodate" depends="check-analyzers-morfologik-uptodate">
+ <ant dir="${common.dir}/../modules/analysis/morfologik" target="jar-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="analyzers-morfologik.uptodate" value="true"/>
+ </target>
+
<property name="grouping.jar" value="${common.dir}/../modules/grouping/build/lucene-grouping-${version}.jar"/>
<target name="check-grouping-uptodate" unless="grouping.uptodate">
<module-uptodate name="grouping" jarfile="${grouping.jar}" property="grouping.uptodate"/>
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1306198&r1=1306197&r2=1306198&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Mar 28 07:33:55 2012
@@ -1,4 +1,4 @@
- Apache Solr Release Notes
+ Apache Solr Release Notes
Introduction
------------
@@ -74,6 +74,9 @@ Detailed Change List
New Features
----------------------
+* SOLR-3272: Solr filter factory for MorfologikFilter (Polish lemmatisation).
+ (RafaÅ KuÄ via Dawid Weiss, Steven Rowe, Uwe Schindler).
+
* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now
supports "percentages" which get evaluated relative the current size of
the cache when warming happens.
Modified: lucene/dev/trunk/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/common-build.xml?rev=1306198&r1=1306197&r2=1306198&view=diff
==============================================================================
--- lucene/dev/trunk/solr/common-build.xml (original)
+++ lucene/dev/trunk/solr/common-build.xml Wed Mar 28 07:33:55 2012
@@ -133,7 +133,7 @@
</target>
<target name="prep-lucene-jars"
- depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-suggest, jar-highlighter, jar-memory,
+ depends="jar-lucene-core, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-morfologik, jar-suggest, jar-highlighter, jar-memory,
jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser">
<property name="solr.deps.compiled" value="true"/>
</target>
@@ -157,6 +157,7 @@
<fileset file="${memory.jar}" />
<fileset file="${misc.jar}" />
<fileset file="${spatial.jar}" />
+ <fileset refid="analyzers-morfologik.fileset" />
</copy>
</sequential>
</target>
Modified: lucene/dev/trunk/solr/contrib/analysis-extras/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/build.xml?rev=1306198&r1=1306197&r2=1306198&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/build.xml (original)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/build.xml Wed Mar 28 07:33:55 2012
@@ -29,19 +29,21 @@
<pathelement path="${analyzers-icu.jar}"/>
<pathelement path="${analyzers-smartcn.jar}"/>
<pathelement path="${analyzers-stempel.jar}"/>
+ <fileset refid="analyzers-morfologik.fileset" />
<path refid="solr.base.classpath"/>
</path>
<target name="module-jars-to-solr"
- depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel">
+ depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, jar-analyzers-morfologik">
<mkdir dir="${build.dir}/lucene-libs"/>
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset file="${analyzers-icu.jar}"/>
<fileset file="${analyzers-smartcn.jar}"/>
<fileset file="${analyzers-stempel.jar}"/>
+ <fileset refid="analyzers-morfologik.fileset" />
</copy>
</target>
- <target name="compile-core" depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, solr-contrib-build.compile-core"/>
+ <target name="compile-core" depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, jar-analyzers-morfologik, solr-contrib-build.compile-core"/>
<target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
</project>
Added: lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java?rev=1306198&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java (added)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java Wed Mar 28 07:33:55 2012
@@ -0,0 +1,81 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.util.Arrays;
+import java.util.Locale;
+import java.util.Map;
+
+import morfologik.stemming.PolishStemmer.DICTIONARY;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.morfologik.MorfologikFilter;
+
+/**
+ * Filter factory for {@link MorfologikFilter}.
+ * <pre class="prettyprint">
+ * <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" />
+ * </analyzer>
+ * </fieldType></pre>
+ *
+ * <p>Any of Morfologik dictionaries can be used, these are at the moment:
+ * <code>MORFOLOGIK</code> (Morfologik's original dictionary),
+ * <code>MORFEUSZ</code> (Morfeusz-SIAT),
+ * <code>COMBINED</code> (both of the dictionaries above, combined).
+ *
+ * @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
+ */
+public class MorfologikFilterFactory extends BaseTokenFilterFactory {
+ /** Dictionary. */
+ private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK;
+
+ /** Schema attribute. */
+ public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public TokenStream create(TokenStream ts) {
+ return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void init(Map<String,String> args) {
+ super.init(args);
+ String dictionaryName = args.get(DICTIONARY_SCHEMA_ATTRIBUTE);
+ if (dictionaryName != null && !dictionaryName.isEmpty()) {
+ try {
+ DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ENGLISH));
+ assert dictionary != null;
+ this.dictionary = dictionary;
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the "
+ + "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: "
+ + dictionaryName);
+ }
+ }
+ }
+}
Added: lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestMorfologikFilterFactory.java?rev=1306198&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestMorfologikFilterFactory.java (added)
+++ lucene/dev/trunk/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestMorfologikFilterFactory.java Wed Mar 28 07:33:55 2012
@@ -0,0 +1,45 @@
+package org.apache.solr.analysis;
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.solr.schema.IndexSchema;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test for {@link MorfologikFilterFactory}.
+ */
+public class TestMorfologikFilterFactory extends BaseTokenTestCase {
+ public void testCreateDictionary() throws Exception {
+ StringReader reader = new StringReader("rowery bilety");
+ Map<String,String> initParams = new HashMap<String,String>();
+ initParams.put(IndexSchema.LUCENE_MATCH_VERSION_PARAM,
+ DEFAULT_VERSION.toString());
+ initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
+ "morfologik");
+ MorfologikFilterFactory factory = new MorfologikFilterFactory();
+ factory.init(initParams);
+ TokenStream ts = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION,
+ reader));
+ assertTokenStreamContents(ts, new String[] {"rower", "bilet"});
+ }
+}