You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/05/03 14:44:23 UTC

svn commit: r940433 [1/3] - in /lucene/dev/trunk/lucene: ./ contrib/ contrib/analyzers/ contrib/analyzers/stempel/ contrib/analyzers/stempel/src/ contrib/analyzers/stempel/src/java/ contrib/analyzers/stempel/src/java/org/ contrib/analyzers/stempel/src/...

Author: rmuir
Date: Mon May  3 12:44:22 2010
New Revision: 940433

URL: http://svn.apache.org/viewvc?rev=940433&view=rev
Log:
LUCENE-2298: Add stempel, an algorithmic stemmer with included Polish support

Added:
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/build.xml   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/pom.xml.template
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie2.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Optimizer2.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Reduce.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Row.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Trie.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/overview.html   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/apache/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/apache/lucene/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stemmer_20000.tbl   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/apache/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/egothor/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/egothor/stemmer/
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestCompile.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/egothor/stemmer/TestStemmer.java   (with props)
    lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/test/org/egothor/stemmer/testRules.txt   (with props)
Modified:
    lucene/dev/trunk/lucene/NOTICE.txt
    lucene/dev/trunk/lucene/build.xml
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/analyzers/build.xml
    lucene/dev/trunk/lucene/docs/contributions.html
    lucene/dev/trunk/lucene/docs/demo.html
    lucene/dev/trunk/lucene/docs/demo2.html
    lucene/dev/trunk/lucene/docs/demo3.html
    lucene/dev/trunk/lucene/docs/demo4.html
    lucene/dev/trunk/lucene/docs/fileformats.html
    lucene/dev/trunk/lucene/docs/gettingstarted.html
    lucene/dev/trunk/lucene/docs/index.html
    lucene/dev/trunk/lucene/docs/linkmap.html
    lucene/dev/trunk/lucene/docs/linkmap.pdf
    lucene/dev/trunk/lucene/docs/lucene-contrib/index.html
    lucene/dev/trunk/lucene/docs/queryparsersyntax.html
    lucene/dev/trunk/lucene/docs/scoring.html
    lucene/dev/trunk/lucene/docs/systemrequirements.html
    lucene/dev/trunk/lucene/src/site/src/documentation/content/xdocs/site.xml

Modified: lucene/dev/trunk/lucene/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/NOTICE.txt?rev=940433&r1=940432&r2=940433&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/NOTICE.txt (original)
+++ lucene/dev/trunk/lucene/NOTICE.txt Mon May  3 12:44:22 2010
@@ -38,6 +38,15 @@ stopword list that is BSD-licensed creat
 contrib/analyzers/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt.
 See http://members.unine.ch/jacques.savoy/clef/index.html.
 
+The Stempel analyzer (contrib/analyzers) includes BSD-licensed software developed 
+by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
+and Edmond Nolan.
+
+The Polish analyzer (contrib/analyzers) comes with a default
+stopword list that is BSD-licensed created by the Carrot2 project. The file resides
+in contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
+See http://project.carrot2.org/license.html.
+
 Includes lib/servlet-api-2.4.jar from  Apache Tomcat
 
 The SmartChineseAnalyzer source code (under contrib/analyzers) was

Modified: lucene/dev/trunk/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/build.xml?rev=940433&r1=940432&r2=940433&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/build.xml (original)
+++ lucene/dev/trunk/lucene/build.xml Mon May  3 12:44:22 2010
@@ -317,6 +317,7 @@ The source distribution does not contain
 
           <packageset dir="contrib/analyzers/common/src/java"/>
           <packageset dir="contrib/analyzers/smartcn/src/java"/>
+          <packageset dir="contrib/analyzers/stempel/src/java"/>
           <packageset dir="contrib/ant/src/java"/>
           <packageset dir="contrib/benchmark/src/java"/>
           <packageset dir="contrib/icu/src/java"/>
@@ -345,7 +346,7 @@ The source distribution does not contain
   
           <group title="Demo" packages="org.apache.lucene.demo*"/>
   
-          <group title="contrib: Analysis" packages="org.apache.lucene.analysis.*:org.tartarus.snowball*"/>
+          <group title="contrib: Analysis" packages="org.apache.lucene.analysis.*:org.tartarus.snowball*:org.egothor.stemmer*"/>
           <group title="contrib: Ant" packages="org.apache.lucene.ant*"/>
           <group title="contrib: Benchmark" packages="org.apache.lucene.benchmark*"/>
           <group title="contrib: ICU" packages="org.apache.lucene.collation*"/>

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=940433&r1=940432&r2=940433&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Mon May  3 12:44:22 2010
@@ -137,6 +137,9 @@ New features
    sensitive way, either from ICU built-in rules (such as Traditional-Simplified),
    or from rules you write yourself.  (Robert Muir)
 
+ * LUCENE-2298: Add analyzers/stempel, an algorithmic stemmer with support for
+   the Polish language.  (Andrzej Bialecki via Robert Muir)
+
 Build
 
  * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 

Modified: lucene/dev/trunk/lucene/contrib/analyzers/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/build.xml?rev=940433&r1=940432&r2=940433&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/build.xml (original)
+++ lucene/dev/trunk/lucene/contrib/analyzers/build.xml Mon May  3 12:44:22 2010
@@ -23,6 +23,7 @@
     Additional Analyzers
       - common:	Additional Analyzers
       - smartcn:	Smart Analyzer for Simplified Chinese Text
+      - stempel:	Algorithmic Stemmer for Polish
   </description>
 
   <target name="common">
@@ -33,23 +34,31 @@
     <ant dir="smartcn" />
   </target>
 
-  <target name="default" depends="common,smartcn" />
+  <target name="stempel">
+    <ant dir="stempel" />
+  </target>
+
+  <target name="default" depends="common,smartcn,stempel" />
 
   <target name="clean">
     <ant dir="common" target="clean" />
     <ant dir="smartcn" target="clean" />
+    <ant dir="stempel" target="clean" />
   </target>
   <target name="compile-core">
     <ant dir="common" target="compile-core" />
     <ant dir="smartcn" target="compile-core" />
+    <ant dir="stempel" target="compile-core" />
   </target>
   <target name="compile-test">
     <ant dir="common" target="compile-test" />
     <ant dir="smartcn" target="compile-test" />
+    <ant dir="stempel" target="compile-test" />
   </target>
   <target name="test">
     <ant dir="common" target="test" />
     <ant dir="smartcn" target="test" />
+    <ant dir="stempel" target="test" />
   </target>
 
   <target name="build-artifacts-and-tests" depends="default,compile-test" />
@@ -57,16 +66,19 @@
   <target name="dist-maven" depends="default">
     <ant dir="common" target="dist-maven" />
     <ant dir="smartcn" target="dist-maven" />
+    <ant dir="stempel" target="dist-maven" />
   </target>  	
 
   <target name="javadocs">
     <ant dir="common" target="javadocs" />
     <ant dir="smartcn" target="javadocs" />
+    <ant dir="stempel" target="javadocs" />
   </target>  	
 
   <target name="javadocs-index.html">
     <ant dir="common" target="javadocs-index.html" />
     <ant dir="smartcn" target="javadocs-index.html" />
+    <ant dir="stempel" target="javadocs-index.html" />
   </target>
 	
 </project>

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/build.xml?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/build.xml (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/build.xml Mon May  3 12:44:22 2010
@@ -0,0 +1,38 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="stempel" default="default">
+
+  <description>
+    Stempel Analyzer
+  </description>
+	
+  <property name="build.dir" location="../../../build/contrib/analyzers/stempel" />
+  <property name="dist.dir" location="../../../dist/contrib/analyzers/stempel" />
+  <property name="maven.dist.dir" location="../../../dist/maven" />
+
+  <import file="../../contrib-build.xml"/>
+	
+  <path id="test.classpath">
+    <path refid="classpath"/>
+    <pathelement location="../../../build/classes/test/"/>
+    <path refid="junit-path"/>
+    <pathelement location="${build.dir}/classes/java"/>
+  </path>	
+</project>

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/pom.xml.template?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/pom.xml.template (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/pom.xml.template Mon May  3 12:44:22 2010
@@ -0,0 +1,35 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+  -->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-contrib</artifactId>
+    <version>@version@</version>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-stempel</artifactId>
+  <name>Lucene Stempel Analyzer</name>
+  <version>@version@</version>
+  <description>Stempel Analyzer</description>
+  <packaging>jar</packaging>
+</project>

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java Mon May  3 12:44:22 2010
@@ -0,0 +1,154 @@
+package org.apache.lucene.analysis.pl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.stempel.StempelStemmer;
+import org.apache.lucene.analysis.stempel.StempelFilter;
+import org.apache.lucene.util.Version;
+import org.egothor.stemmer.Trie;
+
+/**
+ * {@link Analyzer} for Polish.
+ */
+public final class PolishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  private final Trie stemTable;
+  
+  /** File containing default Polish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultsHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultsHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+    static final Trie DEFAULT_TABLE;
+    
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getWordSet(PolishAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set", ex);
+      }
+      
+      InputStream stream = PolishAnalyzer.class.getResourceAsStream("stemmer_20000.tbl");
+      try {
+        DataInputStream in = new DataInputStream(new BufferedInputStream(stream));
+        String method = in.readUTF().toUpperCase();
+        if (method.indexOf('M') < 0) {
+          DEFAULT_TABLE = new org.egothor.stemmer.Trie(in);
+        } else {
+          DEFAULT_TABLE = new org.egothor.stemmer.MultiTrie2(in);
+        }
+        in.close();
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stemming tables", ex);
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public PolishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultsHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public PolishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public PolishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemTable = DefaultsHolder.DEFAULT_TABLE;
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a
+   * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+   * which tokenizes all the text in the provided {@link Reader}.
+   * 
+   * @return A
+   *         {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+   *         built from an {@link StandardTokenizer} filtered with
+   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         provided and {@link StempelFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerFilter(result, stemExclusionSet);
+    result = new StempelFilter(result, new StempelStemmer(stemTable));
+    return new TokenStreamComponents(source, result);
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html Mon May  3 12:44:22 2010
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Polish.
+</body>
+</html>

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java Mon May  3 12:44:22 2010
@@ -0,0 +1,83 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.analysis.stempel;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * Transforms the token stream as per the stemming algorithm.
+ * <p>
+ * Note: the input to the stemming filter must already be in lower case, so you
+ * will need to use LowerCaseFilter or LowerCaseTokenizer farther down the
+ * Tokenizer chain in order for this to work properly!
+ */
+public final class StempelFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+  private final StempelStemmer stemmer;
+  private final int minLength;
+  
+  /**
+   * Minimum length of input words to be processed. Shorter words are returned
+   * unchanged.
+   */
+  public static final int DEFAULT_MIN_LENGTH = 3;
+  
+  /**
+   * Create filter using the supplied stemming table.
+   * 
+   * @param in input token stream
+   * @param stemmer stemmer
+   */
+  public StempelFilter(TokenStream in, StempelStemmer stemmer) {
+    this(in, stemmer, DEFAULT_MIN_LENGTH);
+  }
+  
+  /**
+   * Create filter using the supplied stemming table.
+   * 
+   * @param in input token stream
+   * @param stemmer stemmer
+   * @param minLength For performance reasons words shorter than minLength
+   * characters are not processed, but simply returned.
+   */
+  public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) {
+    super(in);
+    this.stemmer = stemmer;
+    this.minLength = minLength;
+  }
+  
+  /** Returns the next input Token, after being stemmed */
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAtt.isKeyword() && termAtt.length() > minLength) {
+        StringBuilder sb = stemmer.stem(termAtt);
+        if (sb != null) // if we can't stem it, return unchanged
+          termAtt.setEmpty().append(sb);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java Mon May  3 12:44:22 2010
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.lucene.analysis.stempel;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.egothor.stemmer.Diff;
+import org.egothor.stemmer.Trie;
+
+/**
+ * <p>
+ * Stemmer class is a convenient facade for other stemmer-related classes. The
+ * core stemming algorithm and its implementation is taken verbatim from the
+ * Egothor project ( <a href="http://www.egothor.org">www.egothor.org </a>).
+ * </p>
+ * <p>
+ * Even though the stemmer tables supplied in the distribution package are built
+ * for Polish language, there is nothing language-specific here.
+ * </p>
+ */
+public class StempelStemmer {
+  private Trie stemmer = null;
+  private StringBuilder buffer = new StringBuilder();
+
+  /**
+   * Create a Stemmer using selected stemmer table
+   * 
+   * @param stemmerTable stemmer table.
+   */
+  public StempelStemmer(InputStream stemmerTable) throws IOException {
+    if (stemmerTable == null) return;
+    
+    DataInputStream in = new DataInputStream(new BufferedInputStream(
+        stemmerTable));
+    String method = in.readUTF().toUpperCase();
+    if (method.indexOf('M') < 0) {
+      stemmer = new org.egothor.stemmer.Trie(in);
+    } else {
+      stemmer = new org.egothor.stemmer.MultiTrie2(in);
+    }
+    in.close();
+  }
+
+  /**
+   * Create a Stemmer using pre-loaded stemmer table
+   * 
+   * @param stemmer pre-loaded stemmer table
+   */
+  public StempelStemmer(Trie stemmer) {
+    this.stemmer = stemmer;
+  }
+
+  /**
+   * Stem a word. 
+   * 
+   * @param word input word to be stemmed.
+   * @return stemmed word, or null if the stem could not be generated.
+   */
+  public StringBuilder stem(CharSequence word) {
+    CharSequence cmd = stemmer.getLastOnPath(word);
+    
+    if (cmd == null)
+        return null;
+    
+    buffer.setLength(0);
+    buffer.append(word);
+
+    Diff.apply(buffer, cmd);
+    
+    if (buffer.length() > 0)
+      return buffer;
+    else
+      return null;
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html Mon May  3 12:44:22 2010
@@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+	<body>
+		<p>Stempel: Algorithmic Stemmer</p>
+	</body>
+</html>

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/stempel/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java Mon May  3 12:44:22 2010
@@ -0,0 +1,94 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+/**
+ * A Cell is a portion of a trie.
+ */
+class Cell {
+  /** next row id in this way */
+  int ref = -1;
+  /** command of the cell */
+  int cmd = -1;
+  /** how many cmd-s was in subtrie before pack() */
+  int cnt = 0;
+  /** how many chars would be discarded from input key in this way */
+  int skip = 0;
+  
+  /** Constructor for the Cell object. */
+  Cell() {}
+  
+  /**
+   * Construct a Cell using the properties of the given Cell.
+   * 
+   * @param a the Cell whose properties will be used
+   */
+  Cell(Cell a) {
+    ref = a.ref;
+    cmd = a.cmd;
+    cnt = a.cnt;
+    skip = a.skip;
+  }
+  
+  /**
+   * Return a String containing this Cell's attributes.
+   * 
+   * @return a String representation of this Cell
+   */
+  @Override
+  public String toString() {
+    return "ref(" + ref + ")cmd(" + cmd + ")cnt(" + cnt + ")skp(" + skip + ")";
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Cell.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java Mon May  3 12:44:22 2010
@@ -0,0 +1,205 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.util.StringTokenizer;
+
+/**
+ * The Compile class is used to compile a stemmer table.
+ */
+public class Compile {
+  
+  static boolean backward;
+  static boolean multi;
+  static Trie trie;
+  
+  /**
+   * Entry point to the Compile application.
+   * <p>
+   * This program takes any number of arguments: the first is the name of the
+   * desired stemming algorithm to use (a list is available in the package
+   * description) , all of the rest should be the path or paths to a file or
+   * files containing a stemmer table to compile.
+   * 
+   * @param args the command line arguments
+   */
+  public static void main(java.lang.String[] args) {
+    if (args.length < 1) {
+      return;
+    }
+    
+    args[0].toUpperCase();
+    
+    backward = args[0].charAt(0) == '-';
+    int qq = (backward) ? 1 : 0;
+    boolean storeorig = false;
+    
+    if (args[0].charAt(qq) == '0') {
+      storeorig = true;
+      qq++;
+    }
+    
+    multi = args[0].charAt(qq) == 'M';
+    if (multi) {
+      qq++;
+    }
+    
+    String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
+    
+    char optimizer[] = new char[args[0].length() - qq];
+    for (int i = 0; i < optimizer.length; i++) {
+      optimizer[i] = args[0].charAt(qq + i);
+    }
+    
+    for (int i = 1; i < args.length; i++) {
+      LineNumberReader in;
+      // System.out.println("[" + args[i] + "]");
+      Diff diff = new Diff();
+      try {
+        int stems = 0;
+        int words = 0;
+        
+        allocTrie();
+        
+        System.out.println(args[i]);
+        in = new LineNumberReader(new BufferedReader(new InputStreamReader(
+            new FileInputStream(args[i]), charset)));
+        for (String line = in.readLine(); line != null; line = in.readLine()) {
+          try {
+            line = line.toLowerCase();
+            StringTokenizer st = new StringTokenizer(line);
+            String stem = st.nextToken();
+            if (storeorig) {
+              trie.add(stem, "-a");
+              words++;
+            }
+            while (st.hasMoreTokens()) {
+              String token = st.nextToken();
+              if (token.equals(stem) == false) {
+                trie.add(token, diff.exec(token, stem));
+                words++;
+              }
+            }
+          } catch (java.util.NoSuchElementException x) {
+            // no base token (stem) on a line
+          }
+        }
+        
+        Optimizer o = new Optimizer();
+        Optimizer2 o2 = new Optimizer2();
+        Lift l = new Lift(true);
+        Lift e = new Lift(false);
+        Gener g = new Gener();
+        
+        for (int j = 0; j < optimizer.length; j++) {
+          String prefix;
+          switch (optimizer[j]) {
+            case 'G':
+              trie = trie.reduce(g);
+              prefix = "G: ";
+              break;
+            case 'L':
+              trie = trie.reduce(l);
+              prefix = "L: ";
+              break;
+            case 'E':
+              trie = trie.reduce(e);
+              prefix = "E: ";
+              break;
+            case '2':
+              trie = trie.reduce(o2);
+              prefix = "2: ";
+              break;
+            case '1':
+              trie = trie.reduce(o);
+              prefix = "1: ";
+              break;
+            default:
+              continue;
+          }
+          trie.printInfo(prefix + " ");
+        }
+               
+        DataOutputStream os = new DataOutputStream(new BufferedOutputStream(
+            new FileOutputStream(args[i] + ".out")));
+        os.writeUTF(args[0]);
+        trie.store(os);
+        os.close();
+        
+      } catch (FileNotFoundException x) {
+        x.printStackTrace();
+      } catch (IOException x) {
+        x.printStackTrace();
+      }
+    }
+  }
+  
+  static void allocTrie() {
+    if (multi) {
+      trie = new MultiTrie2(!backward);
+    } else {
+      trie = new Trie(!backward);
+    }
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Compile.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java Mon May  3 12:44:22 2010
@@ -0,0 +1,295 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+/**
+ * The Diff object generates a patch string.
+ * <p>
+ * A patch string is actually a command to a stemmer telling it how to reduce a
+ * word to its root. For example, to reduce the word teacher to its root teach
+ * the patch string Db would be generated. This command tells the stemmer to
+ * delete the last 2 characters from the word teacher to reach the stem (the
+ * patch commands are applied starting from the last character in order to save
+ */
+public class Diff {
+  int sizex = 0;
+  int sizey = 0;
+  int net[][];
+  int way[][];
+  
+  int INSERT;
+  int DELETE;
+  int REPLACE;
+  int NOOP;
+  
+  /**
+   * Constructor for the Diff object.
+   */
+  public Diff() {
+    this(1, 1, 1, 0);
+  }
+  
+  /**
+   * Constructor for the Diff object
+   * 
+   * @param ins Description of the Parameter
+   * @param del Description of the Parameter
+   * @param rep Description of the Parameter
+   * @param noop Description of the Parameter
+   */
+  public Diff(int ins, int del, int rep, int noop) {
+    INSERT = ins;
+    DELETE = del;
+    REPLACE = rep;
+    NOOP = noop;
+  }
+  
+  /**
+   * Apply the given patch string <tt>diff</tt> to the given string <tt>
+   * dest</tt>.
+   * 
+   * @param dest Destination string
+   * @param diff Patch string
+   */
+  public static void apply(StringBuilder dest, CharSequence diff) {
+    try {
+      
+      if (diff == null) {
+        return;
+      }
+
+      int pos = dest.length() - 1;
+      if (pos < 0) {
+        return;
+      }
+      // orig == ""
+      for (int i = 0; i < diff.length() / 2; i++) {
+        char cmd = diff.charAt(2 * i);
+        char param = diff.charAt(2 * i + 1);
+        int par_num = (param - 'a' + 1);
+        switch (cmd) {
+          case '-':
+            pos = pos - par_num + 1;
+            break;
+          case 'R':
+            dest.setCharAt(pos, param);
+            break;
+          case 'D':
+            int o = pos;
+            pos -= par_num - 1;
+            /*
+             * delete par_num chars from index pos
+             */
+            // String s = orig.toString();
+            // s = s.substring( 0, pos ) + s.substring( o + 1 );
+            // orig = new StringBuffer( s );
+            dest.delete(pos, o + 1);        
+            break;
+          case 'I':
+            dest.insert(pos += 1, param);
+            break;
+        }
+        pos--;
+      }
+    } catch (StringIndexOutOfBoundsException x) {
+      // x.printStackTrace();
+    } catch (ArrayIndexOutOfBoundsException x) {
+      // x.printStackTrace();
+    }
+  }
+  
+  /**
+   * Construct a patch string that transforms a to b.
+   * 
+   * @param a String 1st string
+   * @param b String 2nd string
+   * @return String
+   */
+  public synchronized String exec(String a, String b) {
+    if (a == null || b == null) {
+      return null;
+    }
+    
+    int x;
+    int y;
+    int maxx;
+    int maxy;
+    int go[] = new int[4];
+    final int X = 1;
+    final int Y = 2;
+    final int R = 3;
+    final int D = 0;
+    
+    /*
+     * setup memory if needed => processing speed up
+     */
+    maxx = a.length() + 1;
+    maxy = b.length() + 1;
+    if ((maxx >= sizex) || (maxy >= sizey)) {
+      sizex = maxx + 8;
+      sizey = maxy + 8;
+      net = new int[sizex][sizey];
+      way = new int[sizex][sizey];
+    }
+    
+    /*
+     * clear the network
+     */
+    for (x = 0; x < maxx; x++) {
+      for (y = 0; y < maxy; y++) {
+        net[x][y] = 0;
+      }
+    }
+    
+    /*
+     * set known persistent values
+     */
+    for (x = 1; x < maxx; x++) {
+      net[x][0] = x;
+      way[x][0] = X;
+    }
+    for (y = 1; y < maxy; y++) {
+      net[0][y] = y;
+      way[0][y] = Y;
+    }
+    
+    for (x = 1; x < maxx; x++) {
+      for (y = 1; y < maxy; y++) {
+        go[X] = net[x - 1][y] + DELETE;
+        // way on x costs 1 unit
+        go[Y] = net[x][y - 1] + INSERT;
+        // way on y costs 1 unit
+        go[R] = net[x - 1][y - 1] + REPLACE;
+        go[D] = net[x - 1][y - 1]
+            + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100);
+        // diagonal costs 0, when no change
+        short min = D;
+        if (go[min] >= go[X]) {
+          min = X;
+        }
+        if (go[min] > go[Y]) {
+          min = Y;
+        }
+        if (go[min] > go[R]) {
+          min = R;
+        }
+        way[x][y] = min;
+        net[x][y] = (short) go[min];
+      }
+    }
+    
+    // read the patch string
+    StringBuffer result = new StringBuffer();
+    final char base = 'a' - 1;
+    char deletes = base;
+    char equals = base;
+    for (x = maxx - 1, y = maxy - 1; x + y != 0;) {
+      switch (way[x][y]) {
+        case X:
+          if (equals != base) {
+            result.append("-" + (equals));
+            equals = base;
+          }
+          deletes++;
+          x--;
+          break;
+        // delete
+        case Y:
+          if (deletes != base) {
+            result.append("D" + (deletes));
+            deletes = base;
+          }
+          if (equals != base) {
+            result.append("-" + (equals));
+            equals = base;
+          }
+          result.append('I');
+          result.append(b.charAt(--y));
+          break;
+        // insert
+        case R:
+          if (deletes != base) {
+            result.append("D" + (deletes));
+            deletes = base;
+          }
+          if (equals != base) {
+            result.append("-" + (equals));
+            equals = base;
+          }
+          result.append('R');
+          result.append(b.charAt(--y));
+          x--;
+          break;
+        // replace
+        case D:
+          if (deletes != base) {
+            result.append("D" + (deletes));
+            deletes = base;
+          }
+          equals++;
+          x--;
+          y--;
+          break;
+        // no change
+      }
+    }
+    if (deletes != base) {
+      result.append("D" + (deletes));
+      deletes = base;
+    }
+    
+    return result.toString();
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Diff.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java Mon May  3 12:44:22 2010
@@ -0,0 +1,121 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.StringTokenizer;
+
+/**
+ * The DiffIt class is a means generate patch commands from an already prepared
+ * stemmer table.
+ */
+public class DiffIt {
+  
+  static int get(int i, String s) {
+    try {
+      return Integer.parseInt(s.substring(i, i + 1));
+    } catch (Throwable x) {
+      return 1;
+    }
+  }
+  
+  /**
+   * Entry point to the DiffIt application.
+   * <p>
+   * This application takes one argument, the path to a file containing a
+   * stemmer table. The program reads the file and generates the patch commands
+   * for the stems.
+   * 
+   * @param args the path to a file containing a stemmer table
+   */
+  public static void main(java.lang.String[] args) {
+    
+    int ins = get(0, args[0]);
+    int del = get(1, args[0]);
+    int rep = get(2, args[0]);
+    int nop = get(3, args[0]);
+    
+    for (int i = 1; i < args.length; i++) {
+      LineNumberReader in;
+      // System.out.println("[" + args[i] + "]");
+      Diff diff = new Diff(ins, del, rep, nop);
+      try {
+        in = new LineNumberReader(new BufferedReader(new FileReader(args[i])));
+        for (String line = in.readLine(); line != null; line = in.readLine()) {
+          try {
+            line = line.toLowerCase();
+            StringTokenizer st = new StringTokenizer(line);
+            String stem = st.nextToken();
+            System.out.println(stem + " -a");
+            while (st.hasMoreTokens()) {
+              String token = st.nextToken();
+              if (token.equals(stem) == false) {
+                System.out.println(stem + " " + diff.exec(token, stem));
+              }
+            }
+          } catch (java.util.NoSuchElementException x) {
+            // no base token (stem) on a line
+          }
+        }
+        
+      } catch (IOException x) {
+        x.printStackTrace();
+      }
+    }
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/DiffIt.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java Mon May  3 12:44:22 2010
@@ -0,0 +1,132 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The Gener object helps in the discarding of nodes which break the reduction
+ * effort and defend the structure against large reductions.
+ */
+public class Gener extends Reduce {
+  /**
+   * Constructor for the Gener object.
+   */
+  public Gener() {}
+  
+  /**
+   * Return a Trie with infrequent values occurring in the given Trie removed.
+   * 
+   * @param orig the Trie to optimize
+   * @return a new optimized Trie
+   */
+  @Override
+  public Trie optimize(Trie orig) {
+    List<CharSequence> cmds = orig.cmds;
+    List<Row> rows = new ArrayList<Row>();
+    List<Row> orows = orig.rows;
+    int remap[] = new int[orows.size()];
+    
+    Arrays.fill(remap, 1);
+    for (int j = orows.size() - 1; j >= 0; j--) {
+      if (eat(orows.get(j), remap)) {
+        remap[j] = 0;
+      }
+    }
+    
+    Arrays.fill(remap, -1);
+    rows = removeGaps(orig.root, orows, new ArrayList<Row>(), remap);
+    
+    return new Trie(orig.forward, remap[orig.root], cmds, rows);
+  }
+  
+  /**
+   * Test whether the given Row of Cells in a Trie should be included in an
+   * optimized Trie.
+   * 
+   * @param in the Row to test
+   * @param remap Description of the Parameter
+   * @return <tt>true</tt> if the Row should remain, <tt>false
+     *      </tt> otherwise
+   */
+  public boolean eat(Row in, int remap[]) {
+    int sum = 0;
+    for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext();) {
+      Cell c = i.next();
+      sum += c.cnt;
+      if (c.ref >= 0) {
+        if (remap[c.ref] == 0) {
+          c.ref = -1;
+        }
+      }
+    }
+    int frame = sum / 10;
+    boolean live = false;
+    for (Iterator<Cell> i = in.cells.values().iterator(); i.hasNext();) {
+      Cell c = i.next();
+      if (c.cnt < frame && c.cmd >= 0) {
+        c.cnt = 0;
+        c.cmd = -1;
+      }
+      if (c.cmd >= 0 || c.ref >= 0) {
+        live |= true;
+      }
+    }
+    return !live;
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Gener.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java Mon May  3 12:44:22 2010
@@ -0,0 +1,147 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * The Lift class is a data structure that is a variation of a Patricia trie.
+ * <p>
+ * Lift's <i>raison d'etre</i> is to implement reduction of the trie via the
+ * Lift-Up method., which makes the data structure less liable to overstemming.
+ */
+public class Lift extends Reduce {
+  boolean changeSkip;
+  
+  /**
+   * Constructor for the Lift object.
+   * 
+   * @param changeSkip when set to <tt>true</tt>, comparison of two Cells takes
+   *          a skip command into account
+   */
+  public Lift(boolean changeSkip) {
+    this.changeSkip = changeSkip;
+  }
+  
+  /**
+   * Optimize (eliminate rows with no content) the given Trie and return the
+   * reduced Trie.
+   * 
+   * @param orig the Trie to optimized
+   * @return the reduced Trie
+   */
+  @Override
+  public Trie optimize(Trie orig) {
+    List<CharSequence> cmds = orig.cmds;
+    List<Row> rows = new ArrayList<Row>();
+    List<Row> orows = orig.rows;
+    int remap[] = new int[orows.size()];
+    
+    for (int j = orows.size() - 1; j >= 0; j--) {
+      liftUp(orows.get(j), orows);
+    }
+    
+    Arrays.fill(remap, -1);
+    rows = removeGaps(orig.root, orows, new ArrayList<Row>(), remap);
+    
+    return new Trie(orig.forward, remap[orig.root], cmds, rows);
+  }
+  
+  /**
+   * Reduce the trie using Lift-Up reduction.
+   * <p>
+   * The Lift-Up reduction propagates all leaf-values (patch commands), where
+   * possible, to higher levels which are closer to the root of the trie.
+   * 
+   * @param in the Row to consider when optimizing
+   * @param nodes contains the patch commands
+   */
+  public void liftUp(Row in, List<Row> nodes) {
+    Iterator<Cell> i = in.cells.values().iterator();
+    for (; i.hasNext();) {
+      Cell c = i.next();
+      if (c.ref >= 0) {
+        Row to = nodes.get(c.ref);
+        int sum = to.uniformCmd(changeSkip);
+        if (sum >= 0) {
+          if (sum == c.cmd) {
+            if (changeSkip) {
+              if (c.skip != to.uniformSkip + 1) {
+                continue;
+              }
+              c.skip = to.uniformSkip + 1;
+            } else {
+              c.skip = 0;
+            }
+            c.cnt += to.uniformCnt;
+            c.ref = -1;
+          } else if (c.cmd < 0) {
+            c.cnt = to.uniformCnt;
+            c.cmd = sum;
+            c.ref = -1;
+            if (changeSkip) {
+              c.skip = to.uniformSkip + 1;
+            } else {
+              c.skip = 0;
+            }
+          }
+        }
+      }
+    }
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/Lift.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java?rev=940433&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java (added)
+++ lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java Mon May  3 12:44:22 2010
@@ -0,0 +1,208 @@
+/*
+                    Egothor Software License version 1.00
+                    Copyright (C) 1997-2004 Leo Galambos.
+                 Copyright (C) 2002-2004 "Egothor developers"
+                      on behalf of the Egothor Project.
+                             All rights reserved.
+
+   This  software  is  copyrighted  by  the "Egothor developers". If this
+   license applies to a single file or document, the "Egothor developers"
+   are the people or entities mentioned as copyright holders in that file
+   or  document.  If  this  license  applies  to the Egothor project as a
+   whole,  the  copyright holders are the people or entities mentioned in
+   the  file CREDITS. This file can be found in the same location as this
+   license in the distribution.
+
+   Redistribution  and  use  in  source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    1. Redistributions  of  source  code  must retain the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       following disclaimer.
+    2. Redistributions  in binary form must reproduce the above copyright
+       notice, the list of contributors, this list of conditions, and the
+       disclaimer  that  follows  these  conditions  in the documentation
+       and/or other materials provided with the distribution.
+    3. The name "Egothor" must not be used to endorse or promote products
+       derived  from  this software without prior written permission. For
+       written permission, please contact Leo.G@seznam.cz
+    4. Products  derived  from this software may not be called "Egothor",
+       nor  may  "Egothor"  appear  in  their name, without prior written
+       permission from Leo.G@seznam.cz.
+
+   In addition, we request that you include in the end-user documentation
+   provided  with  the  redistribution  and/or  in the software itself an
+   acknowledgement equivalent to the following:
+   "This product includes software developed by the Egothor Project.
+    http://egothor.sf.net/"
+
+   THIS  SOFTWARE  IS  PROVIDED  ``AS  IS''  AND ANY EXPRESSED OR IMPLIED
+   WARRANTIES,  INCLUDING,  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+   MERCHANTABILITY  AND  FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   IN  NO  EVENT  SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+   FOR   ANY   DIRECT,   INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
+   CONSEQUENTIAL  DAMAGES  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE  GOODS  OR  SERVICES;  LOSS  OF  USE,  DATA, OR PROFITS; OR
+   BUSINESS  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER  IN  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+   IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   This  software  consists  of  voluntary  contributions  made  by  many
+   individuals  on  behalf  of  the  Egothor  Project  and was originally
+   created by Leo Galambos (Leo.G@seznam.cz).
+ */
+package org.egothor.stemmer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The MultiTrie is a Trie of Tries. It stores words and their associated patch
+ * commands. The MultiTrie handles patch commmands individually (each command by
+ * itself).
+ */
+public class MultiTrie extends Trie {
+  final char EOM = '*';
+  final String EOM_NODE = "" + EOM;
+  
+  List<Trie> tries = new ArrayList<Trie>();
+  
+  int BY = 1;
+  
+  /**
+   * Constructor for the MultiTrie object.
+   * 
+   * @param is the input stream
+   * @exception IOException if an I/O error occurs
+   */
+  public MultiTrie(DataInput is) throws IOException {
+    super(false);
+    forward = is.readBoolean();
+    BY = is.readInt();
+    for (int i = is.readInt(); i > 0; i--) {
+      tries.add(new Trie(is));
+    }
+  }
+  
+  /**
+   * Constructor for the MultiTrie object
+   * 
+   * @param forward set to <tt>true</tt> if the elements should be read left to
+   *          right
+   */
+  public MultiTrie(boolean forward) {
+    super(forward);
+  }
+  
+  /**
+   * Return the element that is stored in a cell associated with the given key.
+   * 
+   * @param key the key to the cell holding the desired element
+   * @return the element
+   */
+  @Override
+  public CharSequence getFully(CharSequence key) {
+    StringBuilder result = new StringBuilder(tries.size() * 2);
+    for (int i = 0; i < tries.size(); i++) {
+      CharSequence r = tries.get(i).getFully(key);
+      if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+        return result;
+      }
+      result.append(r);
+    }
+    return result;
+  }
+  
+  /**
+   * Return the element that is stored as last on a path belonging to the given
+   * key.
+   * 
+   * @param key the key associated with the desired element
+   * @return the element that is stored as last on a path
+   */
+  @Override
+  public CharSequence getLastOnPath(CharSequence key) {
+    StringBuilder result = new StringBuilder(tries.size() * 2);
+    for (int i = 0; i < tries.size(); i++) {
+      CharSequence r = tries.get(i).getLastOnPath(key);
+      if (r == null || (r.length() == 1 && r.charAt(0) == EOM)) {
+        return result;
+      }
+      result.append(r);
+    }
+    return result;
+  }
+  
+  /**
+   * Write this data structure to the given output stream.
+   * 
+   * @param os the output stream
+   * @exception IOException if an I/O error occurs
+   */
+  @Override
+  public void store(DataOutput os) throws IOException {
+    os.writeBoolean(forward);
+    os.writeInt(BY);
+    os.writeInt(tries.size());
+    for (Trie trie : tries)
+      trie.store(os);
+  }
+  
+  /**
+   * Add an element to this structure consisting of the given key and patch
+   * command. 
+   * <p>
+   * This method will return without executing if the <tt>cmd</tt>
+   * parameter's length is 0.
+   * 
+   * @param key the key
+   * @param cmd the patch command
+   */
+  @Override
+  public void add(CharSequence key, CharSequence cmd) {
+    if (cmd.length() == 0) {
+      return;
+    }
+    int levels = cmd.length() / BY;
+    while (levels >= tries.size()) {
+      tries.add(new Trie(forward));
+    }
+    for (int i = 0; i < levels; i++) {
+      tries.get(i).add(key, cmd.subSequence(BY * i, BY * i + BY));
+    }
+    tries.get(levels).add(key, EOM_NODE);
+  }
+  
+  /**
+   * Remove empty rows from the given Trie and return the newly reduced Trie.
+   * 
+   * @param by the Trie to reduce
+   * @return the newly reduced Trie
+   */
+  @Override
+  public Trie reduce(Reduce by) {
+    List<Trie> h = new ArrayList<Trie>();
+    for (Trie trie : tries)
+      h.add(trie.reduce(by));
+    
+    MultiTrie m = new MultiTrie(forward);
+    m.tries = h;
+    return m;
+  }
+  
+  /**
+   * Print the given prefix and the position(s) in the Trie where it appears.
+   * 
+   * @param prefix the desired prefix
+   */
+  @Override
+  public void printInfo(CharSequence prefix) {
+    int c = 0;
+    for (Trie trie : tries)
+      trie.printInfo(prefix + "[" + (++c) + "] ");
+  }
+}

Propchange: lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/egothor/stemmer/MultiTrie.java
------------------------------------------------------------------------------
    svn:eol-style = native