You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/24 17:21:53 UTC

svn commit: r1304845 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/ lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/ext/ lucene/contrib/analyzers/co...

Author: rmuir
Date: Sat Mar 24 16:21:52 2012
New Revision: 1304845

URL: http://svn.apache.org/viewvc?rev=1304845&view=rev
Log:
LUCENE-3883: Irish Analyzer

Added:
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/
      - copied from r1304836, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java
      - copied unchanged from r1304836, lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ga/
      - copied from r1304836, lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/
      - copied from r1304836, lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java
      - copied, changed from r1304836, lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java
      - copied unchanged from r1304836, lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java
    lucene/dev/branches/branch_3x/solr/example/solr/conf/lang/contractions_ga.txt
      - copied unchanged from r1304836, lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt
    lucene/dev/branches/branch_3x/solr/example/solr/conf/lang/hyphenations_ga.txt
      - copied unchanged from r1304836, lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt
    lucene/dev/branches/branch_3x/solr/example/solr/conf/lang/stopwords_ga.txt
      - copied unchanged from r1304836, lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/build.xml
    lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1304845&r1=1304844&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Sat Mar 24 16:21:52 2012
@@ -73,6 +73,8 @@ New Features
  * LUCENE-3714: Add WFSTCompletionLookup suggester that supports more fine-grained
    ranking for suggestions.  (Mike McCandless, Dawid Weiss, Robert Muir)
 
+ * LUCENE-3883: Add Analyzer for Irish. (Jim Regan via Robert Muir)
+
 API Changes
 
  * LUCENE-3596: DirectoryTaxonomyWriter.openIndexWriter() now takes an 

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java?rev=1304845&r1=1304836&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java Sat Mar 24 16:21:52 2012
@@ -22,16 +22,16 @@ import java.io.Reader;
 import java.util.Arrays;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.fr.ElisionFilter;
-import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.KeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.IrishStemmer;
 
@@ -122,11 +122,11 @@ public final class IrishAnalyzer extends
 
   /**
    * Creates a
-   * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+   * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
    * which tokenizes all the text in the provided {@link Reader}.
    * 
    * @return A
-   *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+   *         {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
    *         built from an {@link StandardTokenizer} filtered with
    *         {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter}
    *         , {@link KeywordMarkerFilter} if a stem exclusion set is

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java?rev=1304845&r1=1304836&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java Sat Mar 24 16:21:52 2012
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

Modified: lucene/dev/branches/branch_3x/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/build.xml?rev=1304845&r1=1304844&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/build.xml Sat Mar 24 16:21:52 2012
@@ -677,6 +677,9 @@
   	<!-- french -->
     <copy verbose="true" file="${analysis-common.res.dir}/snowball/french_stop.txt"
                          tofile="${analysis.conf.dest}/stopwords_fr.txt"/>
+        <!-- irish -->
+    <copy verbose="true" file="${analysis-common.res.dir}/ga/stopwords.txt"
+                         tofile="${analysis.conf.dest}/stopwords_ga.txt"/>
   	<!-- galician -->
     <copy verbose="true" file="${analysis-common.res.dir}/gl/stopwords.txt"
                          tofile="${analysis.conf.dest}/stopwords_gl.txt"/>

Copied: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java (from r1304836, lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java?p2=lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java&p1=lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java&r1=1304836&r2=1304845&rev=1304845&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java Sat Mar 24 16:21:52 2012
@@ -31,10 +31,16 @@ import org.apache.lucene.analysis.ga.Iri
  * &lt;/fieldType&gt;</pre>
  *
  */
-public class IrishLowerCaseFilterFactory extends BaseTokenFilterFactory {
+public class IrishLowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
 
-  @Override
+  //@Override
   public TokenStream create(TokenStream input) {
     return new IrishLowerCaseFilter(input);
   }
+
+  // this will 'mostly work', except for special cases, just like most other filters
+  //@Override
+  public Object getMultiTermComponent() {
+    return this;
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml?rev=1304845&r1=1304844&r2=1304845&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml Sat Mar 24 16:21:52 2012
@@ -633,6 +633,20 @@
       </analyzer>
     </fieldType>
     
+    <!-- Irish -->
+    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes d', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
+        <!-- removes n-, etc. position increments is intentionally false! -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
+        <filter class="solr.IrishLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
+      </analyzer>
+    </fieldType>
+    
     <!-- Galician -->
     <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
       <analyzer>