You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/24 16:59:05 UTC
svn commit: r1304836 - in /lucene/dev/trunk: lucene/contrib/ modules/analysis/common/src/java/org/apache/lucene/analysis/ga/ modules/analysis/common/src/java/org/tartarus/snowball/ext/ modules/analysis/common/src/resources/org/apache/lucene/analysis/ga...

Author: rmuir
Date: Sat Mar 24 15:59:04 2012
New Revision: 1304836

URL: http://svn.apache.org/viewvc?rev=1304836&view=rev
Log:
LUCENE-3883: Irish Analyzer

Added:
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java   (with props)
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java   (with props)
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/irish.sbl.txt
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html   (with props)
    lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java   (with props)
    lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/
    lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt   (with props)
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java   (with props)
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java   (with props)
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java   (with props)
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java   (with props)
    lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt   (with props)
    lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt   (with props)
    lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt   (with props)
Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/solr/build.xml
    lucene/dev/trunk/solr/example/solr/conf/schema.xml

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1304836&r1=1304835&r2=1304836&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Sat Mar 24 15:59:04 2012
@@ -189,6 +189,8 @@ New Features
  * LUCENE-3714: Add WFSTCompletionLookup suggester that supports more fine-grained
    ranking for suggestions.  (Mike McCandless, Dawid Weiss, Robert Muir)
 
+ * LUCENE-3883: Add Analyzer for Irish. (Jim Regan via Robert Muir)
+
 API Changes
 
  * LUCENE-3596: DirectoryTaxonomyWriter.openIndexWriter() now takes an

Added: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,151 @@
+package org.apache.lucene.analysis.ga;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.fr.ElisionFilter;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.IrishStemmer;
+
+/**
+ * {@link Analyzer} for Irish.
+ */
+public final class IrishAnalyzer extends StopwordAnalyzerBase {
+  private final CharArraySet stemExclusionSet;
+  
+  /** File containing default Irish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+  
+  private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
+      new CharArraySet(Version.LUCENE_CURRENT, 
+          Arrays.asList(
+              "d", "m", "b"
+          ), true));
+  
+  /**
+   * When StandardTokenizer splits tâathair into {t, athair}, we don't
+   * want to cause a position increment, otherwise there will be problems
+   * with phrase queries versus tAthair (which would not have a gap).
+   */
+  private static final CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet(
+      new CharArraySet(Version.LUCENE_CURRENT,
+          Arrays.asList(
+              "h", "n", "t"
+          ), true));
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static CharArraySet getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final CharArraySet DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = loadStopwordSet(false, 
+            IrishAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public IrishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a
+   * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+   * which tokenizes all the text in the provided {@link Reader}.
+   * 
+   * @return A
+   *         {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+   *         built from an {@link StandardTokenizer} filtered with
+   *         {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter}
+   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
+    s.setEnablePositionIncrements(false);
+    result = s;
+    result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
+    result = new IrishLowerCaseFilter(result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new IrishStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}

Added: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,85 @@
+package org.apache.lucene.analysis.ga;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * Normalises token text to lower case, handling t-prothesis
+ * and n-eclipsis (i.e., that 'nAthair' should become 'n-athair')
+ */
+public final class IrishLowerCaseFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  /**
+   * Create an IrishLowerCaseFilter that normalises Irish token text.
+   */
+  public IrishLowerCaseFilter(TokenStream in) {
+    super(in);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      char[] chArray = termAtt.buffer();
+      int chLen = termAtt.length();
+      int idx = 0;
+
+      if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) {
+        chArray = termAtt.resizeBuffer(chLen + 1);
+        for (int i = chLen; i > 1; i--) {
+          chArray[i] = chArray[i - 1];
+        }
+        chArray[1] = '-';
+        termAtt.setLength(chLen + 1);
+        idx = 2;
+        chLen = chLen + 1;
+      }
+
+      for (int i = idx; i < chLen;) {
+        i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i);
+       }
+      return true;
+    } else {
+      return false;
+    }
+  }
+  
+  private boolean isUpperVowel (int v) {
+    switch (v) {
+      case 'A':
+      case 'E':
+      case 'I':
+      case 'O':
+      case 'U':
+      // vowels with acute accent (fada)
+      case '\u00c1':
+      case '\u00c9':
+      case '\u00cd':
+      case '\u00d3':
+      case '\u00da':
+        return true;
+      default:
+        return false;
+    }
+  }
+}

Added: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/irish.sbl.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/irish.sbl.txt?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/irish.sbl.txt (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/irish.sbl.txt Sat Mar 24 15:59:04 2012
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+ // this is the snowball rules from LUCENE-3883 for reference or for
+ // code-regeneration. we can remove this when its added to snowball.
+ 
+routines (
+  R1 R2 RV
+  initial_morph
+  mark_regions
+  noun_sfx
+  deriv
+  verb_sfx
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* Latin 1 */
+
+stringdef a'   hex 'E1'  // a-acute
+stringdef e'   hex 'E9'  // e-acute
+stringdef i'   hex 'ED'  // i-acute
+stringdef o'   hex 'F3'  // o-acute
+stringdef u'   hex 'FA'  // u-acute
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        gopast v setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define initial_morph as (
+  [substring] among (
+    'h-' 'n-' 't-' //nAthair -> n-athair, but alone are problematic
+    (delete)
+
+    // verbs
+    'd{'}' 
+    (delete)
+    'd{'}fh' 
+    (<- 'f')
+    // other contractions
+    'm{'}' 'b{'}'
+    (delete)
+
+    'sh'
+    (<- 's')
+
+    'mb'
+    (<- 'b')
+    'gc'
+    (<- 'c')
+    'nd'
+    (<- 'd')
+    'bhf'
+    (<- 'f')
+    'ng'
+    (<- 'g')
+    'bp'
+    (<- 'p')
+    'ts'
+    (<- 's')
+    'dt'
+    (<- 't')
+
+    // Lenition
+    'bh'
+    (<- 'b')
+    'ch'
+    (<- 'c')
+    'dh'
+    (<- 'd')
+    'fh'
+    (<- 'f')
+    'gh'
+    (<- 'g')
+    'mh'
+    (<- 'm')
+    'ph'
+    (<- 'p')
+    'th'
+    (<- 't')
+  )
+)
+
+backwardmode (
+
+  define RV as $pV <= cursor
+  define R1 as $p1 <= cursor
+  define R2 as $p2 <= cursor
+
+  define noun_sfx as (
+    [substring] among (
+      'amh' 'eamh' 'abh' 'eabh'
+      'aibh' 'ibh' 'aimh' 'imh'
+      'a{i'}ocht' '{i'}ocht' 'a{i'}ochta' '{i'}ochta'
+      (R1 delete)
+      'ire' 'ir{i'}' 'aire' 'air{i'}'
+      (R2 delete)
+    )
+  )
+  define deriv as (
+    [substring] among (
+      'acht' 'eacht' 'ach' 'each' 'eacht{u'}il' 'eachta' 'acht{u'}il' 'achta'
+      (R2 delete)  //siopadÃ³ireacht -> siopadÃ³ir but not poblacht -> pobl
+      'arcacht' 'arcachta{i'}' 'arcachta'
+      (<- 'arc') // monarcacht -> monarc
+      'gineach' 'gineas' 'ginis'
+      (<- 'gin')
+      'grafa{i'}och' 'grafa{i'}ocht' 'grafa{i'}ochta' 'grafa{i'}ochta{i'}'
+      (<- 'graf')
+      'paite' 'patach' 'pataigh' 'patacha'
+      (<- 'paite')
+      '{o'}ideach' '{o'}ideacha' '{o'}idigh'
+      (<- '{o'}id')
+    )
+  )
+  define verb_sfx as (
+    [substring] among (
+      'imid' 'aimid' '{i'}mid' 'a{i'}mid' 
+      'faidh' 'fidh'
+      (RV delete)
+      'ain'
+      'eadh' 'adh' 
+      '{a'}il'
+      'tear' 'tar'
+      (R1 delete)
+    )
+  )
+)
+
+define stem as (
+  do initial_morph
+  do mark_regions
+  backwards (
+    do noun_sfx
+    do deriv
+    do verb_sfx
+  )
+)

Added: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ga/package.html Sat Mar 24 15:59:04 2012
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analysis for Irish.
+</body>
+</html>

Added: lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,587 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+
+import org.tartarus.snowball.Among;
+import org.tartarus.snowball.SnowballProgram;
+
+ /**
+  * This class was automatically generated by a Snowball to Java compiler 
+  * It implements the stemming algorithm defined by a snowball script.
+  */
+
+public class IrishStemmer extends SnowballProgram {
+
+private static final long serialVersionUID = 1L;
+
+        private final static IrishStemmer methodObject = new IrishStemmer ();
+
+                private final static Among a_0[] = {
+                    new Among ( "b'", -1, 4, "", methodObject ),
+                    new Among ( "bh", -1, 14, "", methodObject ),
+                    new Among ( "bhf", 1, 9, "", methodObject ),
+                    new Among ( "bp", -1, 11, "", methodObject ),
+                    new Among ( "ch", -1, 15, "", methodObject ),
+                    new Among ( "d'", -1, 2, "", methodObject ),
+                    new Among ( "d'fh", 5, 3, "", methodObject ),
+                    new Among ( "dh", -1, 16, "", methodObject ),
+                    new Among ( "dt", -1, 13, "", methodObject ),
+                    new Among ( "fh", -1, 17, "", methodObject ),
+                    new Among ( "gc", -1, 7, "", methodObject ),
+                    new Among ( "gh", -1, 18, "", methodObject ),
+                    new Among ( "h-", -1, 1, "", methodObject ),
+                    new Among ( "m'", -1, 4, "", methodObject ),
+                    new Among ( "mb", -1, 6, "", methodObject ),
+                    new Among ( "mh", -1, 19, "", methodObject ),
+                    new Among ( "n-", -1, 1, "", methodObject ),
+                    new Among ( "nd", -1, 8, "", methodObject ),
+                    new Among ( "ng", -1, 10, "", methodObject ),
+                    new Among ( "ph", -1, 20, "", methodObject ),
+                    new Among ( "sh", -1, 5, "", methodObject ),
+                    new Among ( "t-", -1, 1, "", methodObject ),
+                    new Among ( "th", -1, 21, "", methodObject ),
+                    new Among ( "ts", -1, 12, "", methodObject )
+                };
+
+                private final static Among a_1[] = {
+                    new Among ( "\u00EDochta", -1, 1, "", methodObject ),
+                    new Among ( "a\u00EDochta", 0, 1, "", methodObject ),
+                    new Among ( "ire", -1, 2, "", methodObject ),
+                    new Among ( "aire", 2, 2, "", methodObject ),
+                    new Among ( "abh", -1, 1, "", methodObject ),
+                    new Among ( "eabh", 4, 1, "", methodObject ),
+                    new Among ( "ibh", -1, 1, "", methodObject ),
+                    new Among ( "aibh", 6, 1, "", methodObject ),
+                    new Among ( "amh", -1, 1, "", methodObject ),
+                    new Among ( "eamh", 8, 1, "", methodObject ),
+                    new Among ( "imh", -1, 1, "", methodObject ),
+                    new Among ( "aimh", 10, 1, "", methodObject ),
+                    new Among ( "\u00EDocht", -1, 1, "", methodObject ),
+                    new Among ( "a\u00EDocht", 12, 1, "", methodObject ),
+                    new Among ( "ir\u00ED", -1, 2, "", methodObject ),
+                    new Among ( "air\u00ED", 14, 2, "", methodObject )
+                };
+
+                private final static Among a_2[] = {
+                    new Among ( "\u00F3ideacha", -1, 6, "", methodObject ),
+                    new Among ( "patacha", -1, 5, "", methodObject ),
+                    new Among ( "achta", -1, 1, "", methodObject ),
+                    new Among ( "arcachta", 2, 2, "", methodObject ),
+                    new Among ( "eachta", 2, 1, "", methodObject ),
+                    new Among ( "grafa\u00EDochta", -1, 4, "", methodObject ),
+                    new Among ( "paite", -1, 5, "", methodObject ),
+                    new Among ( "ach", -1, 1, "", methodObject ),
+                    new Among ( "each", 7, 1, "", methodObject ),
+                    new Among ( "\u00F3ideach", 8, 6, "", methodObject ),
+                    new Among ( "gineach", 8, 3, "", methodObject ),
+                    new Among ( "patach", 7, 5, "", methodObject ),
+                    new Among ( "grafa\u00EDoch", -1, 4, "", methodObject ),
+                    new Among ( "pataigh", -1, 5, "", methodObject ),
+                    new Among ( "\u00F3idigh", -1, 6, "", methodObject ),
+                    new Among ( "acht\u00FAil", -1, 1, "", methodObject ),
+                    new Among ( "eacht\u00FAil", 15, 1, "", methodObject ),
+                    new Among ( "gineas", -1, 3, "", methodObject ),
+                    new Among ( "ginis", -1, 3, "", methodObject ),
+                    new Among ( "acht", -1, 1, "", methodObject ),
+                    new Among ( "arcacht", 19, 2, "", methodObject ),
+                    new Among ( "eacht", 19, 1, "", methodObject ),
+                    new Among ( "grafa\u00EDocht", -1, 4, "", methodObject ),
+                    new Among ( "arcachta\u00ED", -1, 2, "", methodObject ),
+                    new Among ( "grafa\u00EDochta\u00ED", -1, 4, "", methodObject )
+                };
+
+                private final static Among a_3[] = {
+                    new Among ( "imid", -1, 1, "", methodObject ),
+                    new Among ( "aimid", 0, 1, "", methodObject ),
+                    new Among ( "\u00EDmid", -1, 1, "", methodObject ),
+                    new Among ( "a\u00EDmid", 2, 1, "", methodObject ),
+                    new Among ( "adh", -1, 2, "", methodObject ),
+                    new Among ( "eadh", 4, 2, "", methodObject ),
+                    new Among ( "faidh", -1, 1, "", methodObject ),
+                    new Among ( "fidh", -1, 1, "", methodObject ),
+                    new Among ( "\u00E1il", -1, 2, "", methodObject ),
+                    new Among ( "ain", -1, 2, "", methodObject ),
+                    new Among ( "tear", -1, 2, "", methodObject ),
+                    new Among ( "tar", -1, 2, "", methodObject )
+                };
+
+                private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 };
+
+        private int I_p2;
+        private int I_p1;
+        private int I_pV;
+
+                private void copy_from(IrishStemmer other) {
+                    I_p2 = other.I_p2;
+                    I_p1 = other.I_p1;
+                    I_pV = other.I_pV;
+                    super.copy_from(other);
+                }
+
+                private boolean r_mark_regions() {
+            int v_1;
+            int v_3;
+                    // (, line 28
+                    I_pV = limit;
+                    I_p1 = limit;
+                    I_p2 = limit;
+                    // do, line 34
+                    v_1 = cursor;
+                    lab0: do {
+                        // (, line 34
+                        // gopast, line 35
+                        golab1: while(true)
+                        {
+                            lab2: do {
+                                if (!(in_grouping(g_v, 97, 250)))
+                                {
+                                    break lab2;
+                                }
+                                break golab1;
+                            } while (false);
+                            if (cursor >= limit)
+                            {
+                                break lab0;
+                            }
+                            cursor++;
+                        }
+                        // setmark pV, line 35
+                        I_pV = cursor;
+                    } while (false);
+                    cursor = v_1;
+                    // do, line 37
+                    v_3 = cursor;
+                    lab3: do {
+                        // (, line 37
+                        // gopast, line 38
+                        golab4: while(true)
+                        {
+                            lab5: do {
+                                if (!(in_grouping(g_v, 97, 250)))
+                                {
+                                    break lab5;
+                                }
+                                break golab4;
+                            } while (false);
+                            if (cursor >= limit)
+                            {
+                                break lab3;
+                            }
+                            cursor++;
+                        }
+                        // gopast, line 38
+                        golab6: while(true)
+                        {
+                            lab7: do {
+                                if (!(out_grouping(g_v, 97, 250)))
+                                {
+                                    break lab7;
+                                }
+                                break golab6;
+                            } while (false);
+                            if (cursor >= limit)
+                            {
+                                break lab3;
+                            }
+                            cursor++;
+                        }
+                        // setmark p1, line 38
+                        I_p1 = cursor;
+                        // gopast, line 39
+                        golab8: while(true)
+                        {
+                            lab9: do {
+                                if (!(in_grouping(g_v, 97, 250)))
+                                {
+                                    break lab9;
+                                }
+                                break golab8;
+                            } while (false);
+                            if (cursor >= limit)
+                            {
+                                break lab3;
+                            }
+                            cursor++;
+                        }
+                        // gopast, line 39
+                        golab10: while(true)
+                        {
+                            lab11: do {
+                                if (!(out_grouping(g_v, 97, 250)))
+                                {
+                                    break lab11;
+                                }
+                                break golab10;
+                            } while (false);
+                            if (cursor >= limit)
+                            {
+                                break lab3;
+                            }
+                            cursor++;
+                        }
+                        // setmark p2, line 39
+                        I_p2 = cursor;
+                    } while (false);
+                    cursor = v_3;
+                    return true;
+                }
+
+                private boolean r_initial_morph() {
+            int among_var;
+                    // (, line 43
+                    // [, line 44
+                    bra = cursor;
+                    // substring, line 44
+                    among_var = find_among(a_0, 24);
+                    if (among_var == 0)
+                    {
+                        return false;
+                    }
+                    // ], line 44
+                    ket = cursor;
+                    switch(among_var) {
+                        case 0:
+                            return false;
+                        case 1:
+                            // (, line 46
+                            // delete, line 46
+                            slice_del();
+                            break;
+                        case 2:
+                            // (, line 50
+                            // delete, line 50
+                            slice_del();
+                            break;
+                        case 3:
+                            // (, line 52
+                            // <-, line 52
+                            slice_from("f");
+                            break;
+                        case 4:
+                            // (, line 55
+                            // delete, line 55
+                            slice_del();
+                            break;
+                        case 5:
+                            // (, line 58
+                            // <-, line 58
+                            slice_from("s");
+                            break;
+                        case 6:
+                            // (, line 61
+                            // <-, line 61
+                            slice_from("b");
+                            break;
+                        case 7:
+                            // (, line 63
+                            // <-, line 63
+                            slice_from("c");
+                            break;
+                        case 8:
+                            // (, line 65
+                            // <-, line 65
+                            slice_from("d");
+                            break;
+                        case 9:
+                            // (, line 67
+                            // <-, line 67
+                            slice_from("f");
+                            break;
+                        case 10:
+                            // (, line 69
+                            // <-, line 69
+                            slice_from("g");
+                            break;
+                        case 11:
+                            // (, line 71
+                            // <-, line 71
+                            slice_from("p");
+                            break;
+                        case 12:
+                            // (, line 73
+                            // <-, line 73
+                            slice_from("s");
+                            break;
+                        case 13:
+                            // (, line 75
+                            // <-, line 75
+                            slice_from("t");
+                            break;
+                        case 14:
+                            // (, line 79
+                            // <-, line 79
+                            slice_from("b");
+                            break;
+                        case 15:
+                            // (, line 81
+                            // <-, line 81
+                            slice_from("c");
+                            break;
+                        case 16:
+                            // (, line 83
+                            // <-, line 83
+                            slice_from("d");
+                            break;
+                        case 17:
+                            // (, line 85
+                            // <-, line 85
+                            slice_from("f");
+                            break;
+                        case 18:
+                            // (, line 87
+                            // <-, line 87
+                            slice_from("g");
+                            break;
+                        case 19:
+                            // (, line 89
+                            // <-, line 89
+                            slice_from("m");
+                            break;
+                        case 20:
+                            // (, line 91
+                            // <-, line 91
+                            slice_from("p");
+                            break;
+                        case 21:
+                            // (, line 93
+                            // <-, line 93
+                            slice_from("t");
+                            break;
+                    }
+                    return true;
+                }
+
+                private boolean r_RV() {
+                    if (!(I_pV <= cursor))
+                    {
+                        return false;
+                    }
+                    return true;
+                }
+
+                private boolean r_R1() {
+                    if (!(I_p1 <= cursor))
+                    {
+                        return false;
+                    }
+                    return true;
+                }
+
+                private boolean r_R2() {
+                    if (!(I_p2 <= cursor))
+                    {
+                        return false;
+                    }
+                    return true;
+                }
+
+                private boolean r_noun_sfx() {
+            int among_var;
+                    // (, line 103
+                    // [, line 104
+                    ket = cursor;
+                    // substring, line 104
+                    among_var = find_among_b(a_1, 16);
+                    if (among_var == 0)
+                    {
+                        return false;
+                    }
+                    // ], line 104
+                    bra = cursor;
+                    switch(among_var) {
+                        case 0:
+                            return false;
+                        case 1:
+                            // (, line 108
+                            // call R1, line 108
+                            if (!r_R1())
+                            {
+                                return false;
+                            }
+                            // delete, line 108
+                            slice_del();
+                            break;
+                        case 2:
+                            // (, line 110
+                            // call R2, line 110
+                            if (!r_R2())
+                            {
+                                return false;
+                            }
+                            // delete, line 110
+                            slice_del();
+                            break;
+                    }
+                    return true;
+                }
+
+                private boolean r_deriv() {
+            int among_var;
+                    // (, line 113
+                    // [, line 114
+                    ket = cursor;
+                    // substring, line 114
+                    among_var = find_among_b(a_2, 25);
+                    if (among_var == 0)
+                    {
+                        return false;
+                    }
+                    // ], line 114
+                    bra = cursor;
+                    switch(among_var) {
+                        case 0:
+                            return false;
+                        case 1:
+                            // (, line 116
+                            // call R2, line 116
+                            if (!r_R2())
+                            {
+                                return false;
+                            }
+                            // delete, line 116
+                            slice_del();
+                            break;
+                        case 2:
+                            // (, line 118
+                            // <-, line 118
+                            slice_from("arc");
+                            break;
+                        case 3:
+                            // (, line 120
+                            // <-, line 120
+                            slice_from("gin");
+                            break;
+                        case 4:
+                            // (, line 122
+                            // <-, line 122
+                            slice_from("graf");
+                            break;
+                        case 5:
+                            // (, line 124
+                            // <-, line 124
+                            slice_from("paite");
+                            break;
+                        case 6:
+                            // (, line 126
+                            // <-, line 126
+                            slice_from("\u00F3id");
+                            break;
+                    }
+                    return true;
+                }
+
+                private boolean r_verb_sfx() {
+            int among_var;
+                    // (, line 129
+                    // [, line 130
+                    ket = cursor;
+                    // substring, line 130
+                    among_var = find_among_b(a_3, 12);
+                    if (among_var == 0)
+                    {
+                        return false;
+                    }
+                    // ], line 130
+                    bra = cursor;
+                    switch(among_var) {
+                        case 0:
+                            return false;
+                        case 1:
+                            // (, line 133
+                            // call RV, line 133
+                            if (!r_RV())
+                            {
+                                return false;
+                            }
+                            // delete, line 133
+                            slice_del();
+                            break;
+                        case 2:
+                            // (, line 138
+                            // call R1, line 138
+                            if (!r_R1())
+                            {
+                                return false;
+                            }
+                            // delete, line 138
+                            slice_del();
+                            break;
+                    }
+                    return true;
+                }
+
+                public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+                    // (, line 143
+                    // do, line 144
+                    v_1 = cursor;
+                    lab0: do {
+                        // call initial_morph, line 144
+                        if (!r_initial_morph())
+                        {
+                            break lab0;
+                        }
+                    } while (false);
+                    cursor = v_1;
+                    // do, line 145
+                    v_2 = cursor;
+                    lab1: do {
+                        // call mark_regions, line 145
+                        if (!r_mark_regions())
+                        {
+                            break lab1;
+                        }
+                    } while (false);
+                    cursor = v_2;
+                    // backwards, line 146
+                    limit_backward = cursor; cursor = limit;
+                    // (, line 146
+                    // do, line 147
+                    v_3 = limit - cursor;
+                    lab2: do {
+                        // call noun_sfx, line 147
+                        if (!r_noun_sfx())
+                        {
+                            break lab2;
+                        }
+                    } while (false);
+                    cursor = limit - v_3;
+                    // do, line 148
+                    v_4 = limit - cursor;
+                    lab3: do {
+                        // call deriv, line 148
+                        if (!r_deriv())
+                        {
+                            break lab3;
+                        }
+                    } while (false);
+                    cursor = limit - v_4;
+                    // do, line 149
+                    v_5 = limit - cursor;
+                    lab4: do {
+                        // call verb_sfx, line 149
+                        if (!r_verb_sfx())
+                        {
+                            break lab4;
+                        }
+                    } while (false);
+                    cursor = limit - v_5;
+                    cursor = limit_backward;                    return true;
+                }
+
+        public boolean equals( Object o ) {
+            return o instanceof IrishStemmer;
+        }
+
+        public int hashCode() {
+            return IrishStemmer.class.getName().hashCode();
+        }
+
+
+
+}
+

Added: lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt (added)
+++ lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt Sat Mar 24 15:59:04 2012
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhÃºr
+caoga
+ceathair
+ceathrar
+chomh
+chtÃ³
+chuig
+chun
+cois
+cÃ©ad
+cÃºig
+cÃºigear
+d'
+daichead
+dar
+de
+deich
+deichniÃºr
+den
+dhÃ¡
+do
+don
+dtÃ
+dÃ¡
+dÃ¡r
+dÃ³
+faoi
+faoin
+faoina
+faoinÃ¡r
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inÃ¡r
+is
+le
+leis
+lena
+lenÃ¡r
+m'
+mar
+mo
+mÃ©
+na
+nach
+naoi
+naonÃºr
+nÃ¡
+nÃ
+nÃor
+nÃ³
+nÃ³cha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtÃ³
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sÃ©
+sÃ
+tar
+thar
+thÃº
+triÃºr
+trÃ
+trÃna
+trÃnÃ¡r
+trÃocha
+tÃº
+um
+Ã¡r
+Ã©
+Ã©is
+Ã
+Ã³
+Ã³n
+Ã³na
+Ã³nÃ¡r

Added: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,71 @@
+package org.apache.lucene.analysis.ga;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new IrishAnalyzer(TEST_VERSION_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "siopadÃ³ireacht", "siopadÃ³ir");
+    checkOneTermReuse(a, "sÃceapatacha", "sÃceapaite");
+    // stopword
+    assertAnalyzesTo(a, "le", new String[] { });
+  }
+  
+  /** test use of elisionfilter */
+  public void testContractions() throws IOException {
+    Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT);
+    assertAnalyzesTo(a, "b'fhearr m'athair",
+        new String[] { "fearr", "athair" });
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("feirmeoireacht"), false);
+    Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT, 
+        IrishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "feirmeoireacht", "feirmeoireacht");
+    checkOneTermReuse(a, "siopadÃ³ireacht", "siopadÃ³ir");
+  }
+  
+  /** test special hyphen handling */
+  public void testHyphens() throws IOException {
+    Analyzer a = new IrishAnalyzer(TEST_VERSION_CURRENT);
+    assertAnalyzesTo(a, "n-athair",
+        new String[] { "athair" },
+        new int[] { 1 });
+  }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new IrishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
+}

Added: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishLowerCaseFilter.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.ga;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Test the Irish lowercase filter.
+ */
+public class TestIrishLowerCaseFilter extends BaseTokenStreamTestCase {
+  
+  /**
+   * Test lowercase
+   */
+  public void testIrishLowerCaseFilter() throws Exception {
+    TokenStream stream = new MockTokenizer(new StringReader(
+        "nAthair tUISCE hARD"), MockTokenizer.WHITESPACE, false);
+    IrishLowerCaseFilter filter = new IrishLowerCaseFilter(stream);
+    assertTokenStreamContents(filter, new String[] {"n-athair", "t-uisce",
+        "hard",});
+  }
+}

Modified: lucene/dev/trunk/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/build.xml?rev=1304836&r1=1304835&r2=1304836&view=diff
==============================================================================
--- lucene/dev/trunk/solr/build.xml (original)
+++ lucene/dev/trunk/solr/build.xml Sat Mar 24 15:59:04 2012
@@ -672,6 +672,9 @@
   	<!-- french -->
     <copy verbose="true" file="${analysis-common.res.dir}/snowball/french_stop.txt"
                          tofile="${analysis.conf.dest}/stopwords_fr.txt"/>
+        <!-- irish -->
+    <copy verbose="true" file="${analysis-common.res.dir}/ga/stopwords.txt"
+                         tofile="${analysis.conf.dest}/stopwords_ga.txt"/>
   	<!-- galician -->
     <copy verbose="true" file="${analysis-common.res.dir}/gl/stopwords.txt"
                          tofile="${analysis.conf.dest}/stopwords_gl.txt"/>

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/IrishLowerCaseFilterFactory.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,40 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
+
+/** 
+ * Factory for {@link IrishLowerCaseFilter}. 
+ * <pre class="prettyprint" >
+ * &lt;fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.IrishLowerCaseFilterFactory"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ *
+ */
+public class IrishLowerCaseFilterFactory extends BaseTokenFilterFactory {
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new IrishLowerCaseFilter(input);
+  }
+}

Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestIrishLowerCaseFilterFactory.java Sat Mar 24 15:59:04 2012
@@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple tests to ensure the Irish lowercase filter factory is working.
+ */
+public class TestIrishLowerCaseFilterFactory extends BaseTokenTestCase {
+  public void testCasing() throws Exception {
+    Reader reader = new StringReader("nAthair tUISCE hARD");
+    IrishLowerCaseFilterFactory factory = new IrishLowerCaseFilterFactory();
+    TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
+    assertTokenStreamContents(stream, new String[] { "n-athair", "t-uisce", "hard" });
+  }
+}

Added: lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt (added)
+++ lucene/dev/trunk/solr/example/solr/conf/lang/contractions_ga.txt Sat Mar 24 15:59:04 2012
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b

Added: lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt (added)
+++ lucene/dev/trunk/solr/example/solr/conf/lang/hyphenations_ga.txt Sat Mar 24 15:59:04 2012
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t

Added: lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt?rev=1304836&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt (added)
+++ lucene/dev/trunk/solr/example/solr/conf/lang/stopwords_ga.txt Sat Mar 24 15:59:04 2012
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhÃºr
+caoga
+ceathair
+ceathrar
+chomh
+chtÃ³
+chuig
+chun
+cois
+cÃ©ad
+cÃºig
+cÃºigear
+d'
+daichead
+dar
+de
+deich
+deichniÃºr
+den
+dhÃ¡
+do
+don
+dtÃ
+dÃ¡
+dÃ¡r
+dÃ³
+faoi
+faoin
+faoina
+faoinÃ¡r
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inÃ¡r
+is
+le
+leis
+lena
+lenÃ¡r
+m'
+mar
+mo
+mÃ©
+na
+nach
+naoi
+naonÃºr
+nÃ¡
+nÃ
+nÃor
+nÃ³
+nÃ³cha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtÃ³
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sÃ©
+sÃ
+tar
+thar
+thÃº
+triÃºr
+trÃ
+trÃna
+trÃnÃ¡r
+trÃocha
+tÃº
+um
+Ã¡r
+Ã©
+Ã©is
+Ã
+Ã³
+Ã³n
+Ã³na
+Ã³nÃ¡r

Modified: lucene/dev/trunk/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?rev=1304836&r1=1304835&r2=1304836&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/schema.xml Sat Mar 24 15:59:04 2012
@@ -617,6 +617,20 @@
       </analyzer>
     </fieldType>
     
+    <!-- Irish -->
+    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes d', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
+        <!-- removes n-, etc. position increments is intentionally false! -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
+        <filter class="solr.IrishLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
+      </analyzer>
+    </fieldType>
+    
     <!-- Galician -->
     <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
       <analyzer>