You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/24 17:21:53 UTC
svn commit: r1304845 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/contrib/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/
lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/ext/
lucene/contrib/analyzers/co...
Author: rmuir
Date: Sat Mar 24 16:21:52 2012
New Revision: 1304845
URL: http://svn.apache.org/viewvc?rev=1304845&view=rev
Log:
LUCENE-3883: Irish Analyzer
Added:
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/
- copied from r1304836, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java
- copied unchanged from r1304836, lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ga/
- copied from r1304836, lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/
- copied from r1304836, lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java
- copied, changed from r1304836, lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java
- copied unchanged from r1304836, lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java
lucene/dev/branches/branch_3x/solr/example/solr/conf/lang/contractions_ga.txt
- copied unchanged from r1304836, lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt
lucene/dev/branches/branch_3x/solr/example/solr/conf/lang/hyphenations_ga.txt
- copied unchanged from r1304836, lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt
lucene/dev/branches/branch_3x/solr/example/solr/conf/lang/stopwords_ga.txt
- copied unchanged from r1304836, lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/build.xml
lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml
Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1304845&r1=1304844&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Sat Mar 24 16:21:52 2012
@@ -73,6 +73,8 @@ New Features
* LUCENE-3714: Add WFSTCompletionLookup suggester that supports more fine-grained
ranking for suggestions. (Mike McCandless, Dawid Weiss, Robert Muir)
+ * LUCENE-3883: Add Analyzer for Irish. (Jim Regan via Robert Muir)
+
API Changes
* LUCENE-3596: DirectoryTaxonomyWriter.openIndexWriter() now takes an
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java?rev=1304845&r1=1304836&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java Sat Mar 24 16:21:52 2012
@@ -22,16 +22,16 @@ import java.io.Reader;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.fr.ElisionFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.IrishStemmer;
@@ -122,11 +122,11 @@ public final class IrishAnalyzer extends
/**
* Creates a
- * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java?rev=1304845&r1=1304836&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java Sat Mar 24 16:21:52 2012
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
Modified: lucene/dev/branches/branch_3x/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/build.xml?rev=1304845&r1=1304844&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/build.xml Sat Mar 24 16:21:52 2012
@@ -677,6 +677,9 @@
<!-- french -->
<copy verbose="true" file="${analysis-common.res.dir}/snowball/french_stop.txt"
tofile="${analysis.conf.dest}/stopwords_fr.txt"/>
+ <!-- irish -->
+ <copy verbose="true" file="${analysis-common.res.dir}/ga/stopwords.txt"
+ tofile="${analysis.conf.dest}/stopwords_ga.txt"/>
<!-- galician -->
<copy verbose="true" file="${analysis-common.res.dir}/gl/stopwords.txt"
tofile="${analysis.conf.dest}/stopwords_gl.txt"/>
Copied: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java (from r1304836, lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java?p2=lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java&p1=lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java&r1=1304836&r2=1304845&rev=1304845&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java Sat Mar 24 16:21:52 2012
@@ -31,10 +31,16 @@ import org.apache.lucene.analysis.ga.Iri
* </fieldType></pre>
*
*/
-public class IrishLowerCaseFilterFactory extends BaseTokenFilterFactory {
+public class IrishLowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
- @Override
+ //@Override
public TokenStream create(TokenStream input) {
return new IrishLowerCaseFilter(input);
}
+
+ // this will 'mostly work', except for special cases, just like most other filters
+ //@Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml?rev=1304845&r1=1304844&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml Sat Mar 24 16:21:52 2012
@@ -633,6 +633,20 @@
</analyzer>
</fieldType>
+ <!-- Irish -->
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <!-- removes d', etc -->
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
+ <!-- removes n-, etc. position increments is intentionally false! -->
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
+ </analyzer>
+ </fieldType>
+
<!-- Galician -->
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer>