You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2013/11/20 12:47:08 UTC
svn commit: r1543793 [1/11] -
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/
Author: joern
Date: Wed Nov 20 11:47:08 2013
New Revision: 1543793
URL: http://svn.apache.org/r1543793
Log:
OPENNLP-572 Initial checking of the snowball stemmers
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/AbstractSnowballStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/Among.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballProgram.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/danishStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/dutchStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/englishStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/finnishStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/frenchStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/germanStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/hungarianStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/italianStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/norwegianStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/porterStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/portugueseStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/romanianStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/russianStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/spanishStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/swedishStemmer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/turkishStemmer.java
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/AbstractSnowballStemmer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/AbstractSnowballStemmer.java?rev=1543793&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/AbstractSnowballStemmer.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/AbstractSnowballStemmer.java Wed Nov 20 11:47:08 2013
@@ -0,0 +1,36 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holders nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+package opennlp.tools.stemmer.snowball;
+
+abstract class AbstractSnowballStemmer extends SnowballProgram {
+ public abstract boolean stem();
+};
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/Among.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/Among.java?rev=1543793&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/Among.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/Among.java Wed Nov 20 11:47:08 2013
@@ -0,0 +1,62 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holders nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+package opennlp.tools.stemmer.snowball;
+
+import java.lang.reflect.Method;
+
+class Among {
+ public Among (String s, int substring_i, int result,
+ String methodname, SnowballProgram methodobject) {
+ this.s_size = s.length();
+ this.s = s.toCharArray();
+ this.substring_i = substring_i;
+ this.result = result;
+ this.methodobject = methodobject;
+ if (methodname.length() == 0) {
+ this.method = null;
+ } else {
+ try {
+ this.method = methodobject.getClass().
+ getDeclaredMethod(methodname, new Class[0]);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ public final int s_size; /* search string */
+ public final char[] s; /* search string */
+ public final int substring_i; /* index to longest matching substring */
+ public final int result; /* result of the lookup */
+ public final Method method; /* method to use if substring matches */
+ public final SnowballProgram methodobject; /* object to invoke method on */
+};
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballProgram.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballProgram.java?rev=1543793&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballProgram.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballProgram.java Wed Nov 20 11:47:08 2013
@@ -0,0 +1,462 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holders nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+package opennlp.tools.stemmer.snowball;
+import java.lang.reflect.InvocationTargetException;
+
+class SnowballProgram {
+ protected SnowballProgram()
+ {
+ current = new StringBuffer();
+ setCurrent("");
+ }
+
+ /**
+ * Set the current string.
+ */
+ public void setCurrent(String value)
+ {
+ current.replace(0, current.length(), value);
+ cursor = 0;
+ limit = current.length();
+ limit_backward = 0;
+ bra = cursor;
+ ket = limit;
+ }
+
+ /**
+ * Get the current string.
+ */
+ public String getCurrent()
+ {
+ String result = current.toString();
+ // Make a new StringBuffer. If we reuse the old one, and a user of
+ // the library keeps a reference to the buffer returned (for example,
+ // by converting it to a String in a way which doesn't force a copy),
+ // the buffer size will not decrease, and we will risk wasting a large
+ // amount of memory.
+ // Thanks to Wolfram Esser for spotting this problem.
+ current = new StringBuffer();
+ return result;
+ }
+
+ // current string
+ protected StringBuffer current;
+
+ protected int cursor;
+ protected int limit;
+ protected int limit_backward;
+ protected int bra;
+ protected int ket;
+
+ protected void copy_from(SnowballProgram other)
+ {
+ current = other.current;
+ cursor = other.cursor;
+ limit = other.limit;
+ limit_backward = other.limit_backward;
+ bra = other.bra;
+ ket = other.ket;
+ }
+
+ protected boolean in_grouping(char [] s, int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (ch > max || ch < min) return false;
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+ cursor++;
+ return true;
+ }
+
+ protected boolean in_grouping_b(char [] s, int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if (ch > max || ch < min) return false;
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+ cursor--;
+ return true;
+ }
+
+ protected boolean out_grouping(char [] s, int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (ch > max || ch < min) {
+ cursor++;
+ return true;
+ }
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+ cursor ++;
+ return true;
+ }
+ return false;
+ }
+
+ protected boolean out_grouping_b(char [] s, int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if (ch > max || ch < min) {
+ cursor--;
+ return true;
+ }
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+ cursor--;
+ return true;
+ }
+ return false;
+ }
+
+ protected boolean in_range(int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (ch > max || ch < min) return false;
+ cursor++;
+ return true;
+ }
+
+ protected boolean in_range_b(int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if (ch > max || ch < min) return false;
+ cursor--;
+ return true;
+ }
+
+ protected boolean out_range(int min, int max)
+ {
+ if (cursor >= limit) return false;
+ char ch = current.charAt(cursor);
+ if (!(ch > max || ch < min)) return false;
+ cursor++;
+ return true;
+ }
+
+ protected boolean out_range_b(int min, int max)
+ {
+ if (cursor <= limit_backward) return false;
+ char ch = current.charAt(cursor - 1);
+ if(!(ch > max || ch < min)) return false;
+ cursor--;
+ return true;
+ }
+
+ protected boolean eq_s(int s_size, String s)
+ {
+ if (limit - cursor < s_size) return false;
+ int i;
+ for (i = 0; i != s_size; i++) {
+ if (current.charAt(cursor + i) != s.charAt(i)) return false;
+ }
+ cursor += s_size;
+ return true;
+ }
+
+ protected boolean eq_s_b(int s_size, String s)
+ {
+ if (cursor - limit_backward < s_size) return false;
+ int i;
+ for (i = 0; i != s_size; i++) {
+ if (current.charAt(cursor - s_size + i) != s.charAt(i)) return false;
+ }
+ cursor -= s_size;
+ return true;
+ }
+
+ protected boolean eq_v(CharSequence s)
+ {
+ return eq_s(s.length(), s.toString());
+ }
+
+ protected boolean eq_v_b(CharSequence s)
+ { return eq_s_b(s.length(), s.toString());
+ }
+
+ protected int find_among(Among v[], int v_size)
+ {
+ int i = 0;
+ int j = v_size;
+
+ int c = cursor;
+ int l = limit;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ boolean first_key_inspected = false;
+
+ while(true) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j; // smaller
+ Among w = v[k];
+ int i2;
+ for (i2 = common; i2 < w.s_size; i2++) {
+ if (c + common == l) {
+ diff = -1;
+ break;
+ }
+ diff = current.charAt(c + common) - w.s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ if (diff < 0) {
+ j = k;
+ common_j = common;
+ } else {
+ i = k;
+ common_i = common;
+ }
+ if (j - i <= 1) {
+ if (i > 0) break; // v->s has been inspected
+ if (j == i) break; // only one item in v
+
+ // - but now we need to go round once more to get
+ // v->s inspected. This looks messy, but is actually
+ // the optimal approach.
+
+ if (first_key_inspected) break;
+ first_key_inspected = true;
+ }
+ }
+ while(true) {
+ Among w = v[i];
+ if (common_i >= w.s_size) {
+ cursor = c + w.s_size;
+ if (w.method == null) return w.result;
+ boolean res;
+ try {
+ Object resobj = w.method.invoke(w.methodobject,
+ new Object[0]);
+ res = resobj.toString().equals("true");
+ } catch (InvocationTargetException e) {
+ res = false;
+ // FIXME - debug message
+ } catch (IllegalAccessException e) {
+ res = false;
+ // FIXME - debug message
+ }
+ cursor = c + w.s_size;
+ if (res) return w.result;
+ }
+ i = w.substring_i;
+ if (i < 0) return 0;
+ }
+ }
+
+ // find_among_b is for backwards processing. Same comments apply
+ protected int find_among_b(Among v[], int v_size)
+ {
+ int i = 0;
+ int j = v_size;
+
+ int c = cursor;
+ int lb = limit_backward;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ boolean first_key_inspected = false;
+
+ while(true) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j;
+ Among w = v[k];
+ int i2;
+ for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
+ if (c - common == lb) {
+ diff = -1;
+ break;
+ }
+ diff = current.charAt(c - 1 - common) - w.s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ if (diff < 0) {
+ j = k;
+ common_j = common;
+ } else {
+ i = k;
+ common_i = common;
+ }
+ if (j - i <= 1) {
+ if (i > 0) break;
+ if (j == i) break;
+ if (first_key_inspected) break;
+ first_key_inspected = true;
+ }
+ }
+ while(true) {
+ Among w = v[i];
+ if (common_i >= w.s_size) {
+ cursor = c - w.s_size;
+ if (w.method == null) return w.result;
+
+ boolean res;
+ try {
+ Object resobj = w.method.invoke(w.methodobject,
+ new Object[0]);
+ res = resobj.toString().equals("true");
+ } catch (InvocationTargetException e) {
+ res = false;
+ // FIXME - debug message
+ } catch (IllegalAccessException e) {
+ res = false;
+ // FIXME - debug message
+ }
+ cursor = c - w.s_size;
+ if (res) return w.result;
+ }
+ i = w.substring_i;
+ if (i < 0) return 0;
+ }
+ }
+
+ /* to replace chars between c_bra and c_ket in current by the
+ * chars in s.
+ */
+ protected int replace_s(int c_bra, int c_ket, String s)
+ {
+ int adjustment = s.length() - (c_ket - c_bra);
+ current.replace(c_bra, c_ket, s);
+ limit += adjustment;
+ if (cursor >= c_ket) cursor += adjustment;
+ else if (cursor > c_bra) cursor = c_bra;
+ return adjustment;
+ }
+
+ protected void slice_check()
+ {
+ if (bra < 0 ||
+ bra > ket ||
+ ket > limit ||
+ limit > current.length()) // this line could be removed
+ {
+ System.err.println("faulty slice operation");
+ // FIXME: report error somehow.
+ /*
+ fprintf(stderr, "faulty slice operation:\n");
+ debug(z, -1, 0);
+ exit(1);
+ */
+ }
+ }
+
+ protected void slice_from(String s)
+ {
+ slice_check();
+ replace_s(bra, ket, s);
+ }
+
+ protected void slice_from(CharSequence s)
+ {
+ slice_from(s.toString());
+ }
+
+ protected void slice_del()
+ {
+ slice_from("");
+ }
+
+ protected void insert(int c_bra, int c_ket, String s)
+ {
+ int adjustment = replace_s(c_bra, c_ket, s);
+ if (c_bra <= bra) bra += adjustment;
+ if (c_bra <= ket) ket += adjustment;
+ }
+
+ protected void insert(int c_bra, int c_ket, CharSequence s)
+ {
+ insert(c_bra, c_ket, s.toString());
+ }
+
+ /* Copy the slice into the supplied StringBuffer */
+ protected StringBuffer slice_to(StringBuffer s)
+ {
+ slice_check();
+ int len = ket - bra;
+ s.replace(0, s.length(), current.substring(bra, ket));
+ return s;
+ }
+
+ /* Copy the slice into the supplied StringBuilder */
+ protected StringBuilder slice_to(StringBuilder s)
+ {
+ slice_check();
+ int len = ket - bra;
+ s.replace(0, s.length(), current.substring(bra, ket));
+ return s;
+ }
+
+ protected StringBuffer assign_to(StringBuffer s)
+ {
+ s.replace(0, s.length(), current.substring(0, limit));
+ return s;
+ }
+
+ protected StringBuilder assign_to(StringBuilder s)
+ {
+ s.replace(0, s.length(), current.substring(0, limit));
+ return s;
+ }
+
+/*
+extern void debug(struct SN_env * z, int number, int line_count)
+{ int i;
+ int limit = SIZE(z->p);
+ //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
+ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+ for (i = 0; i <= limit; i++)
+ { if (z->lb == i) printf("{");
+ if (z->bra == i) printf("[");
+ if (z->c == i) printf("|");
+ if (z->ket == i) printf("]");
+ if (z->l == i) printf("}");
+ if (i < limit)
+ { int ch = z->p[i];
+ if (ch == 0) ch = '#';
+ printf("%c", ch);
+ }
+ }
+ printf("'\n");
+}
+*/
+
+};
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballStemmer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballStemmer.java?rev=1543793&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballStemmer.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/SnowballStemmer.java Wed Nov 20 11:47:08 2013
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.stemmer.snowball;
+
+import opennlp.tools.stemmer.Stemmer;
+
+public class SnowballStemmer implements Stemmer {
+
+ public enum ALGORITHM {
+ DANISH,
+ DUTCH,
+ ENGLISH,
+ FINNISH,
+ FRENCH,
+ GERMAN,
+ HUNGARIAN,
+ ITALIAN,
+ NORWEGIAN,
+ PORTER,
+ PORTUGUESE,
+ ROMANIAN,
+ RUSSIAN,
+ SPANISH,
+ SWEDISH,
+ TURKISH
+ }
+
+ private final AbstractSnowballStemmer stemmer;
+ private final int repeat;
+
+ public SnowballStemmer(ALGORITHM algorithm, int repeat) {
+ this.repeat = repeat;
+
+ if (ALGORITHM.DANISH.equals(algorithm)) {
+ stemmer = new danishStemmer();
+ }
+ else if (ALGORITHM.DUTCH.equals(algorithm)) {
+ stemmer = new dutchStemmer();
+ }
+ else if (ALGORITHM.ENGLISH.equals(algorithm)) {
+ stemmer = new englishStemmer();
+ }
+ else if (ALGORITHM.FINNISH.equals(algorithm)) {
+ stemmer = new finnishStemmer();
+ }
+ else if (ALGORITHM.FRENCH.equals(algorithm)) {
+ stemmer = new frenchStemmer();
+ }
+ else if (ALGORITHM.GERMAN.equals(algorithm)) {
+ stemmer = new germanStemmer();
+ }
+ else if (ALGORITHM.HUNGARIAN.equals(algorithm)) {
+ stemmer = new hungarianStemmer();
+ }
+ else if (ALGORITHM.ITALIAN.equals(algorithm)) {
+ stemmer = new italianStemmer();
+ }
+ else if (ALGORITHM.NORWEGIAN.equals(algorithm)) {
+ stemmer = new norwegianStemmer();
+ }
+ else if (ALGORITHM.PORTER.equals(algorithm)) {
+ stemmer = new porterStemmer();
+ }
+ else if (ALGORITHM.PORTUGUESE.equals(algorithm)) {
+ stemmer = new portugueseStemmer();
+ }
+ else if (ALGORITHM.ROMANIAN.equals(algorithm)) {
+ stemmer = new romanianStemmer();
+ }
+ else if (ALGORITHM.RUSSIAN.equals(algorithm)) {
+ stemmer = new russianStemmer();
+ }
+ else if (ALGORITHM.SPANISH.equals(algorithm)) {
+ stemmer = new spanishStemmer();
+ }
+ else if (ALGORITHM.SWEDISH.equals(algorithm)) {
+ stemmer = new swedishStemmer();
+ }
+ else if (ALGORITHM.TURKISH.equals(algorithm)) {
+ stemmer = new turkishStemmer();
+ }
+ else {
+ throw new IllegalStateException("Unexpected stemmer algorithm: " + algorithm.toString());
+ }
+ }
+
+ public SnowballStemmer(ALGORITHM algorithm) {
+ this(algorithm, 1);
+ }
+
+ public CharSequence stem(CharSequence word) {
+
+ stemmer.setCurrent(word.toString());
+
+ for (int i = 0; i < repeat; i++) {
+ stemmer.stem();
+ }
+
+ return stemmer.getCurrent();
+ }
+}
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/danishStemmer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/danishStemmer.java?rev=1543793&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/danishStemmer.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/danishStemmer.java Wed Nov 20 11:47:08 2013
@@ -0,0 +1,469 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holders nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+// This file was generated automatically by the Snowball to Java compiler
+
+package opennlp.tools.stemmer.snowball;
+
+
+ /**
+ * This class was automatically generated by a Snowball to Java compiler
+ * It implements the stemming algorithm defined by a snowball script.
+ */
+
+class danishStemmer extends opennlp.tools.stemmer.snowball.AbstractSnowballStemmer {
+
+private static final long serialVersionUID = 1L;
+
+ private final static danishStemmer methodObject = new danishStemmer ();
+
+ private final static Among a_0[] = {
+ new Among ( "hed", -1, 1, "", methodObject ),
+ new Among ( "ethed", 0, 1, "", methodObject ),
+ new Among ( "ered", -1, 1, "", methodObject ),
+ new Among ( "e", -1, 1, "", methodObject ),
+ new Among ( "erede", 3, 1, "", methodObject ),
+ new Among ( "ende", 3, 1, "", methodObject ),
+ new Among ( "erende", 5, 1, "", methodObject ),
+ new Among ( "ene", 3, 1, "", methodObject ),
+ new Among ( "erne", 3, 1, "", methodObject ),
+ new Among ( "ere", 3, 1, "", methodObject ),
+ new Among ( "en", -1, 1, "", methodObject ),
+ new Among ( "heden", 10, 1, "", methodObject ),
+ new Among ( "eren", 10, 1, "", methodObject ),
+ new Among ( "er", -1, 1, "", methodObject ),
+ new Among ( "heder", 13, 1, "", methodObject ),
+ new Among ( "erer", 13, 1, "", methodObject ),
+ new Among ( "s", -1, 2, "", methodObject ),
+ new Among ( "heds", 16, 1, "", methodObject ),
+ new Among ( "es", 16, 1, "", methodObject ),
+ new Among ( "endes", 18, 1, "", methodObject ),
+ new Among ( "erendes", 19, 1, "", methodObject ),
+ new Among ( "enes", 18, 1, "", methodObject ),
+ new Among ( "ernes", 18, 1, "", methodObject ),
+ new Among ( "eres", 18, 1, "", methodObject ),
+ new Among ( "ens", 16, 1, "", methodObject ),
+ new Among ( "hedens", 24, 1, "", methodObject ),
+ new Among ( "erens", 24, 1, "", methodObject ),
+ new Among ( "ers", 16, 1, "", methodObject ),
+ new Among ( "ets", 16, 1, "", methodObject ),
+ new Among ( "erets", 28, 1, "", methodObject ),
+ new Among ( "et", -1, 1, "", methodObject ),
+ new Among ( "eret", 30, 1, "", methodObject )
+ };
+
+ private final static Among a_1[] = {
+ new Among ( "gd", -1, -1, "", methodObject ),
+ new Among ( "dt", -1, -1, "", methodObject ),
+ new Among ( "gt", -1, -1, "", methodObject ),
+ new Among ( "kt", -1, -1, "", methodObject )
+ };
+
+ private final static Among a_2[] = {
+ new Among ( "ig", -1, 1, "", methodObject ),
+ new Among ( "lig", 0, 1, "", methodObject ),
+ new Among ( "elig", 1, 1, "", methodObject ),
+ new Among ( "els", -1, 1, "", methodObject ),
+ new Among ( "l\u00F8st", -1, 2, "", methodObject )
+ };
+
+ private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+
+ private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
+
+ private int I_x;
+ private int I_p1;
+ private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
+
+ private void copy_from(danishStemmer other) {
+ I_x = other.I_x;
+ I_p1 = other.I_p1;
+ S_ch = other.S_ch;
+ super.copy_from(other);
+ }
+
+ private boolean r_mark_regions() {
+ int v_1;
+ int v_2;
+ // (, line 29
+ I_p1 = limit;
+ // test, line 33
+ v_1 = cursor;
+ // (, line 33
+ // hop, line 33
+ {
+ int c = cursor + 3;
+ if (0 > c || c > limit)
+ {
+ return false;
+ }
+ cursor = c;
+ }
+ // setmark x, line 33
+ I_x = cursor;
+ cursor = v_1;
+ // goto, line 34
+ golab0: while(true)
+ {
+ v_2 = cursor;
+ lab1: do {
+ if (!(in_grouping(g_v, 97, 248)))
+ {
+ break lab1;
+ }
+ cursor = v_2;
+ break golab0;
+ } while (false);
+ cursor = v_2;
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // gopast, line 34
+ golab2: while(true)
+ {
+ lab3: do {
+ if (!(out_grouping(g_v, 97, 248)))
+ {
+ break lab3;
+ }
+ break golab2;
+ } while (false);
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // setmark p1, line 34
+ I_p1 = cursor;
+ // try, line 35
+ lab4: do {
+ // (, line 35
+ if (!(I_p1 < I_x))
+ {
+ break lab4;
+ }
+ I_p1 = I_x;
+ } while (false);
+ return true;
+ }
+
+ private boolean r_main_suffix() {
+ int among_var;
+ int v_1;
+ int v_2;
+ // (, line 40
+ // setlimit, line 41
+ v_1 = limit - cursor;
+ // tomark, line 41
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_2 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_1;
+ // (, line 41
+ // [, line 41
+ ket = cursor;
+ // substring, line 41
+ among_var = find_among_b(a_0, 32);
+ if (among_var == 0)
+ {
+ limit_backward = v_2;
+ return false;
+ }
+ // ], line 41
+ bra = cursor;
+ limit_backward = v_2;
+ switch(among_var) {
+ case 0:
+ return false;
+ case 1:
+ // (, line 48
+ // delete, line 48
+ slice_del();
+ break;
+ case 2:
+ // (, line 50
+ if (!(in_grouping_b(g_s_ending, 97, 229)))
+ {
+ return false;
+ }
+ // delete, line 50
+ slice_del();
+ break;
+ }
+ return true;
+ }
+
+ private boolean r_consonant_pair() {
+ int v_1;
+ int v_2;
+ int v_3;
+ // (, line 54
+ // test, line 55
+ v_1 = limit - cursor;
+ // (, line 55
+ // setlimit, line 56
+ v_2 = limit - cursor;
+ // tomark, line 56
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_3 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_2;
+ // (, line 56
+ // [, line 56
+ ket = cursor;
+ // substring, line 56
+ if (find_among_b(a_1, 4) == 0)
+ {
+ limit_backward = v_3;
+ return false;
+ }
+ // ], line 56
+ bra = cursor;
+ limit_backward = v_3;
+ cursor = limit - v_1;
+ // next, line 62
+ if (cursor <= limit_backward)
+ {
+ return false;
+ }
+ cursor--;
+ // ], line 62
+ bra = cursor;
+ // delete, line 62
+ slice_del();
+ return true;
+ }
+
+ private boolean r_other_suffix() {
+ int among_var;
+ int v_1;
+ int v_2;
+ int v_3;
+ int v_4;
+ // (, line 65
+ // do, line 66
+ v_1 = limit - cursor;
+ lab0: do {
+ // (, line 66
+ // [, line 66
+ ket = cursor;
+ // literal, line 66
+ if (!(eq_s_b(2, "st")))
+ {
+ break lab0;
+ }
+ // ], line 66
+ bra = cursor;
+ // literal, line 66
+ if (!(eq_s_b(2, "ig")))
+ {
+ break lab0;
+ }
+ // delete, line 66
+ slice_del();
+ } while (false);
+ cursor = limit - v_1;
+ // setlimit, line 67
+ v_2 = limit - cursor;
+ // tomark, line 67
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_3 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_2;
+ // (, line 67
+ // [, line 67
+ ket = cursor;
+ // substring, line 67
+ among_var = find_among_b(a_2, 5);
+ if (among_var == 0)
+ {
+ limit_backward = v_3;
+ return false;
+ }
+ // ], line 67
+ bra = cursor;
+ limit_backward = v_3;
+ switch(among_var) {
+ case 0:
+ return false;
+ case 1:
+ // (, line 70
+ // delete, line 70
+ slice_del();
+ // do, line 70
+ v_4 = limit - cursor;
+ lab1: do {
+ // call consonant_pair, line 70
+ if (!r_consonant_pair())
+ {
+ break lab1;
+ }
+ } while (false);
+ cursor = limit - v_4;
+ break;
+ case 2:
+ // (, line 72
+ // <-, line 72
+ slice_from("l\u00F8s");
+ break;
+ }
+ return true;
+ }
+
+ private boolean r_undouble() {
+ int v_1;
+ int v_2;
+ // (, line 75
+ // setlimit, line 76
+ v_1 = limit - cursor;
+ // tomark, line 76
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_2 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_1;
+ // (, line 76
+ // [, line 76
+ ket = cursor;
+ if (!(out_grouping_b(g_v, 97, 248)))
+ {
+ limit_backward = v_2;
+ return false;
+ }
+ // ], line 76
+ bra = cursor;
+ // -> ch, line 76
+ S_ch = slice_to(S_ch);
+ limit_backward = v_2;
+ // name ch, line 77
+ if (!(eq_v_b(S_ch)))
+ {
+ return false;
+ }
+ // delete, line 78
+ slice_del();
+ return true;
+ }
+
+ public boolean stem() {
+ int v_1;
+ int v_2;
+ int v_3;
+ int v_4;
+ int v_5;
+ // (, line 82
+ // do, line 84
+ v_1 = cursor;
+ lab0: do {
+ // call mark_regions, line 84
+ if (!r_mark_regions())
+ {
+ break lab0;
+ }
+ } while (false);
+ cursor = v_1;
+ // backwards, line 85
+ limit_backward = cursor; cursor = limit;
+ // (, line 85
+ // do, line 86
+ v_2 = limit - cursor;
+ lab1: do {
+ // call main_suffix, line 86
+ if (!r_main_suffix())
+ {
+ break lab1;
+ }
+ } while (false);
+ cursor = limit - v_2;
+ // do, line 87
+ v_3 = limit - cursor;
+ lab2: do {
+ // call consonant_pair, line 87
+ if (!r_consonant_pair())
+ {
+ break lab2;
+ }
+ } while (false);
+ cursor = limit - v_3;
+ // do, line 88
+ v_4 = limit - cursor;
+ lab3: do {
+ // call other_suffix, line 88
+ if (!r_other_suffix())
+ {
+ break lab3;
+ }
+ } while (false);
+ cursor = limit - v_4;
+ // do, line 89
+ v_5 = limit - cursor;
+ lab4: do {
+ // call undouble, line 89
+ if (!r_undouble())
+ {
+ break lab4;
+ }
+ } while (false);
+ cursor = limit - v_5;
+ cursor = limit_backward; return true;
+ }
+
+ public boolean equals( Object o ) {
+ return o instanceof danishStemmer;
+ }
+
+ public int hashCode() {
+ return danishStemmer.class.getName().hashCode();
+ }
+
+
+
+}
+
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/dutchStemmer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/dutchStemmer.java?rev=1543793&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/dutchStemmer.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/stemmer/snowball/dutchStemmer.java Wed Nov 20 11:47:08 2013
@@ -0,0 +1,883 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holders nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+// This file was generated automatically by the Snowball to Java compiler
+
+package opennlp.tools.stemmer.snowball;
+
+
+ /**
+ * This class was automatically generated by a Snowball to Java compiler
+ * It implements the stemming algorithm defined by a snowball script.
+ */
+
+class dutchStemmer extends opennlp.tools.stemmer.snowball.AbstractSnowballStemmer {
+
+private static final long serialVersionUID = 1L;
+
+ private final static dutchStemmer methodObject = new dutchStemmer ();
+
+ private final static Among a_0[] = {
+ new Among ( "", -1, 6, "", methodObject ),
+ new Among ( "\u00E1", 0, 1, "", methodObject ),
+ new Among ( "\u00E4", 0, 1, "", methodObject ),
+ new Among ( "\u00E9", 0, 2, "", methodObject ),
+ new Among ( "\u00EB", 0, 2, "", methodObject ),
+ new Among ( "\u00ED", 0, 3, "", methodObject ),
+ new Among ( "\u00EF", 0, 3, "", methodObject ),
+ new Among ( "\u00F3", 0, 4, "", methodObject ),
+ new Among ( "\u00F6", 0, 4, "", methodObject ),
+ new Among ( "\u00FA", 0, 5, "", methodObject ),
+ new Among ( "\u00FC", 0, 5, "", methodObject )
+ };
+
+ private final static Among a_1[] = {
+ new Among ( "", -1, 3, "", methodObject ),
+ new Among ( "I", 0, 2, "", methodObject ),
+ new Among ( "Y", 0, 1, "", methodObject )
+ };
+
+ private final static Among a_2[] = {
+ new Among ( "dd", -1, -1, "", methodObject ),
+ new Among ( "kk", -1, -1, "", methodObject ),
+ new Among ( "tt", -1, -1, "", methodObject )
+ };
+
+ private final static Among a_3[] = {
+ new Among ( "ene", -1, 2, "", methodObject ),
+ new Among ( "se", -1, 3, "", methodObject ),
+ new Among ( "en", -1, 2, "", methodObject ),
+ new Among ( "heden", 2, 1, "", methodObject ),
+ new Among ( "s", -1, 3, "", methodObject )
+ };
+
+ private final static Among a_4[] = {
+ new Among ( "end", -1, 1, "", methodObject ),
+ new Among ( "ig", -1, 2, "", methodObject ),
+ new Among ( "ing", -1, 1, "", methodObject ),
+ new Among ( "lijk", -1, 3, "", methodObject ),
+ new Among ( "baar", -1, 4, "", methodObject ),
+ new Among ( "bar", -1, 5, "", methodObject )
+ };
+
+ private final static Among a_5[] = {
+ new Among ( "aa", -1, -1, "", methodObject ),
+ new Among ( "ee", -1, -1, "", methodObject ),
+ new Among ( "oo", -1, -1, "", methodObject ),
+ new Among ( "uu", -1, -1, "", methodObject )
+ };
+
+ private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+ private static final char g_v_I[] = {1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+ private static final char g_v_j[] = {17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+ private int I_p2;
+ private int I_p1;
+ private boolean B_e_found;
+
+ private void copy_from(dutchStemmer other) {
+ I_p2 = other.I_p2;
+ I_p1 = other.I_p1;
+ B_e_found = other.B_e_found;
+ super.copy_from(other);
+ }
+
+ private boolean r_prelude() {
+ int among_var;
+ int v_1;
+ int v_2;
+ int v_3;
+ int v_4;
+ int v_5;
+ int v_6;
+ // (, line 41
+ // test, line 42
+ v_1 = cursor;
+ // repeat, line 42
+ replab0: while(true)
+ {
+ v_2 = cursor;
+ lab1: do {
+ // (, line 42
+ // [, line 43
+ bra = cursor;
+ // substring, line 43
+ among_var = find_among(a_0, 11);
+ if (among_var == 0)
+ {
+ break lab1;
+ }
+ // ], line 43
+ ket = cursor;
+ switch(among_var) {
+ case 0:
+ break lab1;
+ case 1:
+ // (, line 45
+ // <-, line 45
+ slice_from("a");
+ break;
+ case 2:
+ // (, line 47
+ // <-, line 47
+ slice_from("e");
+ break;
+ case 3:
+ // (, line 49
+ // <-, line 49
+ slice_from("i");
+ break;
+ case 4:
+ // (, line 51
+ // <-, line 51
+ slice_from("o");
+ break;
+ case 5:
+ // (, line 53
+ // <-, line 53
+ slice_from("u");
+ break;
+ case 6:
+ // (, line 54
+ // next, line 54
+ if (cursor >= limit)
+ {
+ break lab1;
+ }
+ cursor++;
+ break;
+ }
+ continue replab0;
+ } while (false);
+ cursor = v_2;
+ break replab0;
+ }
+ cursor = v_1;
+ // try, line 57
+ v_3 = cursor;
+ lab2: do {
+ // (, line 57
+ // [, line 57
+ bra = cursor;
+ // literal, line 57
+ if (!(eq_s(1, "y")))
+ {
+ cursor = v_3;
+ break lab2;
+ }
+ // ], line 57
+ ket = cursor;
+ // <-, line 57
+ slice_from("Y");
+ } while (false);
+ // repeat, line 58
+ replab3: while(true)
+ {
+ v_4 = cursor;
+ lab4: do {
+ // goto, line 58
+ golab5: while(true)
+ {
+ v_5 = cursor;
+ lab6: do {
+ // (, line 58
+ if (!(in_grouping(g_v, 97, 232)))
+ {
+ break lab6;
+ }
+ // [, line 59
+ bra = cursor;
+ // or, line 59
+ lab7: do {
+ v_6 = cursor;
+ lab8: do {
+ // (, line 59
+ // literal, line 59
+ if (!(eq_s(1, "i")))
+ {
+ break lab8;
+ }
+ // ], line 59
+ ket = cursor;
+ if (!(in_grouping(g_v, 97, 232)))
+ {
+ break lab8;
+ }
+ // <-, line 59
+ slice_from("I");
+ break lab7;
+ } while (false);
+ cursor = v_6;
+ // (, line 60
+ // literal, line 60
+ if (!(eq_s(1, "y")))
+ {
+ break lab6;
+ }
+ // ], line 60
+ ket = cursor;
+ // <-, line 60
+ slice_from("Y");
+ } while (false);
+ cursor = v_5;
+ break golab5;
+ } while (false);
+ cursor = v_5;
+ if (cursor >= limit)
+ {
+ break lab4;
+ }
+ cursor++;
+ }
+ continue replab3;
+ } while (false);
+ cursor = v_4;
+ break replab3;
+ }
+ return true;
+ }
+
+ private boolean r_mark_regions() {
+ // (, line 64
+ I_p1 = limit;
+ I_p2 = limit;
+ // gopast, line 69
+ golab0: while(true)
+ {
+ lab1: do {
+ if (!(in_grouping(g_v, 97, 232)))
+ {
+ break lab1;
+ }
+ break golab0;
+ } while (false);
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // gopast, line 69
+ golab2: while(true)
+ {
+ lab3: do {
+ if (!(out_grouping(g_v, 97, 232)))
+ {
+ break lab3;
+ }
+ break golab2;
+ } while (false);
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // setmark p1, line 69
+ I_p1 = cursor;
+ // try, line 70
+ lab4: do {
+ // (, line 70
+ if (!(I_p1 < 3))
+ {
+ break lab4;
+ }
+ I_p1 = 3;
+ } while (false);
+ // gopast, line 71
+ golab5: while(true)
+ {
+ lab6: do {
+ if (!(in_grouping(g_v, 97, 232)))
+ {
+ break lab6;
+ }
+ break golab5;
+ } while (false);
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // gopast, line 71
+ golab7: while(true)
+ {
+ lab8: do {
+ if (!(out_grouping(g_v, 97, 232)))
+ {
+ break lab8;
+ }
+ break golab7;
+ } while (false);
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // setmark p2, line 71
+ I_p2 = cursor;
+ return true;
+ }
+
+ private boolean r_postlude() {
+ int among_var;
+ int v_1;
+ // repeat, line 75
+ replab0: while(true)
+ {
+ v_1 = cursor;
+ lab1: do {
+ // (, line 75
+ // [, line 77
+ bra = cursor;
+ // substring, line 77
+ among_var = find_among(a_1, 3);
+ if (among_var == 0)
+ {
+ break lab1;
+ }
+ // ], line 77
+ ket = cursor;
+ switch(among_var) {
+ case 0:
+ break lab1;
+ case 1:
+ // (, line 78
+ // <-, line 78
+ slice_from("y");
+ break;
+ case 2:
+ // (, line 79
+ // <-, line 79
+ slice_from("i");
+ break;
+ case 3:
+ // (, line 80
+ // next, line 80
+ if (cursor >= limit)
+ {
+ break lab1;
+ }
+ cursor++;
+ break;
+ }
+ continue replab0;
+ } while (false);
+ cursor = v_1;
+ break replab0;
+ }
+ return true;
+ }
+
+ private boolean r_R1() {
+ if (!(I_p1 <= cursor))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ private boolean r_R2() {
+ if (!(I_p2 <= cursor))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ private boolean r_undouble() {
+ int v_1;
+ // (, line 90
+ // test, line 91
+ v_1 = limit - cursor;
+ // among, line 91
+ if (find_among_b(a_2, 3) == 0)
+ {
+ return false;
+ }
+ cursor = limit - v_1;
+ // [, line 91
+ ket = cursor;
+ // next, line 91
+ if (cursor <= limit_backward)
+ {
+ return false;
+ }
+ cursor--;
+ // ], line 91
+ bra = cursor;
+ // delete, line 91
+ slice_del();
+ return true;
+ }
+
+ private boolean r_e_ending() {
+ int v_1;
+ // (, line 94
+ // unset e_found, line 95
+ B_e_found = false;
+ // [, line 96
+ ket = cursor;
+ // literal, line 96
+ if (!(eq_s_b(1, "e")))
+ {
+ return false;
+ }
+ // ], line 96
+ bra = cursor;
+ // call R1, line 96
+ if (!r_R1())
+ {
+ return false;
+ }
+ // test, line 96
+ v_1 = limit - cursor;
+ if (!(out_grouping_b(g_v, 97, 232)))
+ {
+ return false;
+ }
+ cursor = limit - v_1;
+ // delete, line 96
+ slice_del();
+ // set e_found, line 97
+ B_e_found = true;
+ // call undouble, line 98
+ if (!r_undouble())
+ {
+ return false;
+ }
+ return true;
+ }
+
+ private boolean r_en_ending() {
+ int v_1;
+ int v_2;
+ // (, line 101
+ // call R1, line 102
+ if (!r_R1())
+ {
+ return false;
+ }
+ // and, line 102
+ v_1 = limit - cursor;
+ if (!(out_grouping_b(g_v, 97, 232)))
+ {
+ return false;
+ }
+ cursor = limit - v_1;
+ // not, line 102
+ {
+ v_2 = limit - cursor;
+ lab0: do {
+ // literal, line 102
+ if (!(eq_s_b(3, "gem")))
+ {
+ break lab0;
+ }
+ return false;
+ } while (false);
+ cursor = limit - v_2;
+ }
+ // delete, line 102
+ slice_del();
+ // call undouble, line 103
+ if (!r_undouble())
+ {
+ return false;
+ }
+ return true;
+ }
+
+ private boolean r_standard_suffix() {
+ int among_var;
+ int v_1;
+ int v_2;
+ int v_3;
+ int v_4;
+ int v_5;
+ int v_6;
+ int v_7;
+ int v_8;
+ int v_9;
+ int v_10;
+ // (, line 106
+ // do, line 107
+ v_1 = limit - cursor;
+ lab0: do {
+ // (, line 107
+ // [, line 108
+ ket = cursor;
+ // substring, line 108
+ among_var = find_among_b(a_3, 5);
+ if (among_var == 0)
+ {
+ break lab0;
+ }
+ // ], line 108
+ bra = cursor;
+ switch(among_var) {
+ case 0:
+ break lab0;
+ case 1:
+ // (, line 110
+ // call R1, line 110
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ // <-, line 110
+ slice_from("heid");
+ break;
+ case 2:
+ // (, line 113
+ // call en_ending, line 113
+ if (!r_en_ending())
+ {
+ break lab0;
+ }
+ break;
+ case 3:
+ // (, line 116
+ // call R1, line 116
+ if (!r_R1())
+ {
+ break lab0;
+ }
+ if (!(out_grouping_b(g_v_j, 97, 232)))
+ {
+ break lab0;
+ }
+ // delete, line 116
+ slice_del();
+ break;
+ }
+ } while (false);
+ cursor = limit - v_1;
+ // do, line 120
+ v_2 = limit - cursor;
+ lab1: do {
+ // call e_ending, line 120
+ if (!r_e_ending())
+ {
+ break lab1;
+ }
+ } while (false);
+ cursor = limit - v_2;
+ // do, line 122
+ v_3 = limit - cursor;
+ lab2: do {
+ // (, line 122
+ // [, line 122
+ ket = cursor;
+ // literal, line 122
+ if (!(eq_s_b(4, "heid")))
+ {
+ break lab2;
+ }
+ // ], line 122
+ bra = cursor;
+ // call R2, line 122
+ if (!r_R2())
+ {
+ break lab2;
+ }
+ // not, line 122
+ {
+ v_4 = limit - cursor;
+ lab3: do {
+ // literal, line 122
+ if (!(eq_s_b(1, "c")))
+ {
+ break lab3;
+ }
+ break lab2;
+ } while (false);
+ cursor = limit - v_4;
+ }
+ // delete, line 122
+ slice_del();
+ // [, line 123
+ ket = cursor;
+ // literal, line 123
+ if (!(eq_s_b(2, "en")))
+ {
+ break lab2;
+ }
+ // ], line 123
+ bra = cursor;
+ // call en_ending, line 123
+ if (!r_en_ending())
+ {
+ break lab2;
+ }
+ } while (false);
+ cursor = limit - v_3;
+ // do, line 126
+ v_5 = limit - cursor;
+ lab4: do {
+ // (, line 126
+ // [, line 127
+ ket = cursor;
+ // substring, line 127
+ among_var = find_among_b(a_4, 6);
+ if (among_var == 0)
+ {
+ break lab4;
+ }
+ // ], line 127
+ bra = cursor;
+ switch(among_var) {
+ case 0:
+ break lab4;
+ case 1:
+ // (, line 129
+ // call R2, line 129
+ if (!r_R2())
+ {
+ break lab4;
+ }
+ // delete, line 129
+ slice_del();
+ // or, line 130
+ lab5: do {
+ v_6 = limit - cursor;
+ lab6: do {
+ // (, line 130
+ // [, line 130
+ ket = cursor;
+ // literal, line 130
+ if (!(eq_s_b(2, "ig")))
+ {
+ break lab6;
+ }
+ // ], line 130
+ bra = cursor;
+ // call R2, line 130
+ if (!r_R2())
+ {
+ break lab6;
+ }
+ // not, line 130
+ {
+ v_7 = limit - cursor;
+ lab7: do {
+ // literal, line 130
+ if (!(eq_s_b(1, "e")))
+ {
+ break lab7;
+ }
+ break lab6;
+ } while (false);
+ cursor = limit - v_7;
+ }
+ // delete, line 130
+ slice_del();
+ break lab5;
+ } while (false);
+ cursor = limit - v_6;
+ // call undouble, line 130
+ if (!r_undouble())
+ {
+ break lab4;
+ }
+ } while (false);
+ break;
+ case 2:
+ // (, line 133
+ // call R2, line 133
+ if (!r_R2())
+ {
+ break lab4;
+ }
+ // not, line 133
+ {
+ v_8 = limit - cursor;
+ lab8: do {
+ // literal, line 133
+ if (!(eq_s_b(1, "e")))
+ {
+ break lab8;
+ }
+ break lab4;
+ } while (false);
+ cursor = limit - v_8;
+ }
+ // delete, line 133
+ slice_del();
+ break;
+ case 3:
+ // (, line 136
+ // call R2, line 136
+ if (!r_R2())
+ {
+ break lab4;
+ }
+ // delete, line 136
+ slice_del();
+ // call e_ending, line 136
+ if (!r_e_ending())
+ {
+ break lab4;
+ }
+ break;
+ case 4:
+ // (, line 139
+ // call R2, line 139
+ if (!r_R2())
+ {
+ break lab4;
+ }
+ // delete, line 139
+ slice_del();
+ break;
+ case 5:
+ // (, line 142
+ // call R2, line 142
+ if (!r_R2())
+ {
+ break lab4;
+ }
+ // Boolean test e_found, line 142
+ if (!(B_e_found))
+ {
+ break lab4;
+ }
+ // delete, line 142
+ slice_del();
+ break;
+ }
+ } while (false);
+ cursor = limit - v_5;
+ // do, line 146
+ v_9 = limit - cursor;
+ lab9: do {
+ // (, line 146
+ if (!(out_grouping_b(g_v_I, 73, 232)))
+ {
+ break lab9;
+ }
+ // test, line 148
+ v_10 = limit - cursor;
+ // (, line 148
+ // among, line 149
+ if (find_among_b(a_5, 4) == 0)
+ {
+ break lab9;
+ }
+ if (!(out_grouping_b(g_v, 97, 232)))
+ {
+ break lab9;
+ }
+ cursor = limit - v_10;
+ // [, line 152
+ ket = cursor;
+ // next, line 152
+ if (cursor <= limit_backward)
+ {
+ break lab9;
+ }
+ cursor--;
+ // ], line 152
+ bra = cursor;
+ // delete, line 152
+ slice_del();
+ } while (false);
+ cursor = limit - v_9;
+ return true;
+ }
+
+ public boolean stem() {
+ int v_1;
+ int v_2;
+ int v_3;
+ int v_4;
+ // (, line 157
+ // do, line 159
+ v_1 = cursor;
+ lab0: do {
+ // call prelude, line 159
+ if (!r_prelude())
+ {
+ break lab0;
+ }
+ } while (false);
+ cursor = v_1;
+ // do, line 160
+ v_2 = cursor;
+ lab1: do {
+ // call mark_regions, line 160
+ if (!r_mark_regions())
+ {
+ break lab1;
+ }
+ } while (false);
+ cursor = v_2;
+ // backwards, line 161
+ limit_backward = cursor; cursor = limit;
+ // do, line 162
+ v_3 = limit - cursor;
+ lab2: do {
+ // call standard_suffix, line 162
+ if (!r_standard_suffix())
+ {
+ break lab2;
+ }
+ } while (false);
+ cursor = limit - v_3;
+ cursor = limit_backward; // do, line 163
+ v_4 = cursor;
+ lab3: do {
+ // call postlude, line 163
+ if (!r_postlude())
+ {
+ break lab3;
+ }
+ } while (false);
+ cursor = v_4;
+ return true;
+ }
+
+ public boolean equals( Object o ) {
+ return o instanceof dutchStemmer;
+ }
+
+ public int hashCode() {
+ return dutchStemmer.class.getName().hashCode();
+ }
+
+
+
+}
+