You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Andrew Green <nd...@yahoo.com.mx> on 2007/04/30 21:19:18 UTC
Re: Snowball and accents filter...? (solved)
El sáb, 28-04-2007 a las 19:43 -0400, Erick Erickson escribió:
> You actually wouldn't have to maintain two versions. You could,
> instead, inject the accentless (stemmed) terms in your single
> index as synonyms (See Lucene In Action). This is easier
> to search and maintain....
>
> But it also bloats your index by some factor since you're storing two
> words for every accented word in your corpus. And gives you
> headaches if there is more than one accent in the word (do you
> then store all 4 possibilities for two accents? 8 for 3? etc?).
>
> I think your notion of running the search terms through a dictionary
> is a very good one. That way, your searcher doesn't have to care
> about all this nonsense, and assume correctly-accented characters.
>
> Erick
After meditating on this a while we decided to just stick to creating a
version of the SpanishStemmer that doesn't use accents. We've gotten it
working, and are donating the code to Lucene, in case the project has
any use for it. Ideally I think it should be merged with the current
SpanishStemmer, so that a single stemmer works both for indexes and
queries that remove accents, and those that don't.
The code is at the end of the message. The author of the modifications
is Sandra Luz Aguirre, who is an intern at the Instituto de
Investigaciones Dr. José María Luis Mora [1].
Thanks, all, for your help.
Andrew Green
[1] http://www.mora.edu.mx
---------------
SnowballAnalyzerWithoutAccents.java:
package org.apache.lucene.analysis.snowball;
import java.io.Reader;
import org.apache.lucene.analysis.ISOLatin1AccentFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
public class SnowballAnalyzerWithoutAccents extends SnowballAnalyzer {
public SnowballAnalyzerWithoutAccents(String name, String[] stopWords) {
super(name, stopWords);
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(result, stopSet);
result =new ISOLatin1AccentFilter(result);
result = new SnowballFilter(result, name);
return result;
}
}
---------------
Spanish2Stemmer.java (based on SpanishStemmer; works as expected when
used with SnowballAnalyzerWithoutAccents):
package net.sf.snowball.ext;
import net.sf.snowball.Among;
import net.sf.snowball.SnowballProgram;
public class Spanish2Stemmer extends SnowballProgram {
private Among a_0[] = {
new Among ( "", -1, 6, "", this),
new Among ( "a", 0, 1, "", this),
new Among ( "e", 0, 2, "", this),
new Among ( "i", 0, 3, "", this),
new Among ( "o", 0, 4, "", this),
new Among ( "u", 0, 5, "", this)
};
private Among a_1[] = {
new Among ( "la", -1, -1, "", this),
new Among ( "sela", 0, -1, "", this),
new Among ( "le", -1, -1, "", this),
new Among ( "me", -1, -1, "", this),
new Among ( "se", -1, -1, "", this),
new Among ( "lo", -1, -1, "", this),
new Among ( "selo", 5, -1, "", this),
new Among ( "las", -1, -1, "", this),
new Among ( "selas", 7, -1, "", this),
new Among ( "les", -1, -1, "", this),
new Among ( "los", -1, -1, "", this),
new Among ( "selos", 10, -1, "", this),
new Among ( "nos", -1, -1, "", this)
};
private Among a_2[] = {
new Among ( "ando", -1, 6, "", this),
new Among ( "iendo", -1, 6, "", this),
new Among ( "yendo", -1, 7, "", this),
new Among ( "ando", -1, 2, "", this),
new Among ( "iendo", -1, 1, "", this),
new Among ( "ar", -1, 6, "", this),
new Among ( "er", -1, 6, "", this),
new Among ( "ir", -1, 6, "", this),
new Among ( "ar", -1, 3, "", this),
new Among ( "er", -1, 4, "", this),
new Among ( "ir", -1, 5, "", this)
};
private Among a_3[] = {
new Among ( "ic", -1, -1, "", this),
new Among ( "ad", -1, -1, "", this),
new Among ( "os", -1, -1, "", this),
new Among ( "iv", -1, 1, "", this)
};
private Among a_4[] = {
new Among ( "able", -1, 1, "", this),
new Among ( "ible", -1, 1, "", this)
};
private Among a_5[] = {
new Among ( "ic", -1, 1, "", this),
new Among ( "abil", -1, 1, "", this),
new Among ( "iv", -1, 1, "", this)
};
private Among a_6[] = {
new Among ( "ica", -1, 1, "", this),
new Among ( "encia", -1, 5, "", this),
new Among ( "adora", -1, 2, "", this),
new Among ( "osa", -1, 1, "", this),
new Among ( "ista", -1, 1, "", this),
new Among ( "iva", -1, 9, "", this),
new Among ( "anza", -1, 1, "", this),
new Among ( "logia", -1, 3, "", this),
new Among ( "idad", -1, 8, "", this),
new Among ( "able", -1, 1, "", this),
new Among ( "ible", -1, 1, "", this),
new Among ( "mente", -1, 7, "", this),
new Among ( "amente", 11, 6, "", this),
new Among ( "acion", -1, 2, "", this),
new Among ( "ucion", -1, 4, "", this),
new Among ( "ico", -1, 1, "", this),
new Among ( "ismo", -1, 1, "", this),
new Among ( "oso", -1, 1, "", this),
new Among ( "amiento", -1, 1, "", this),
new Among ( "imiento", -1, 1, "", this),
new Among ( "ivo", -1, 9, "", this),
new Among ( "ador", -1, 2, "", this),
new Among ( "icas", -1, 1, "", this),
new Among ( "encias", -1, 5, "", this),
new Among ( "adoras", -1, 2, "", this),
new Among ( "osas", -1, 1, "", this),
new Among ( "istas", -1, 1, "", this),
new Among ( "ivas", -1, 9, "", this),
new Among ( "anzas", -1, 1, "", this),
new Among ( "logias", -1, 3, "", this),
new Among ( "idades", -1, 8, "", this),
new Among ( "ables", -1, 1, "", this),
new Among ( "ibles", -1, 1, "", this),
new Among ( "aciones", -1, 2, "", this),
new Among ( "uciones", -1, 4, "", this),
new Among ( "adores", -1, 2, "", this),
new Among ( "icos", -1, 1, "", this),
new Among ( "ismos", -1, 1, "", this),
new Among ( "osos", -1, 1, "", this),
new Among ( "amientos", -1, 1, "", this),
new Among ( "imientos", -1, 1, "", this),
new Among ( "ivos", -1, 9, "", this)
};
private Among a_7[] = {
new Among ( "ya", -1, 1, "", this),
new Among ( "ye", -1, 1, "", this),
new Among ( "yan", -1, 1, "", this),
new Among ( "yen", -1, 1, "", this),
new Among ( "yeron", -1, 1, "", this),
new Among ( "yendo", -1, 1, "", this),
new Among ( "yo", -1, 1, "", this),
new Among ( "yas", -1, 1, "", this),
new Among ( "yes", -1, 1, "", this),
new Among ( "yais", -1, 1, "", this),
new Among ( "yamos", -1, 1, "", this),
new Among ( "yo", -1, 1, "", this)
};
private Among a_8[] = {
new Among ( "aba", -1, 2, "", this),
new Among ( "ada", -1, 2, "", this),
new Among ( "ida", -1, 2, "", this),
new Among ( "ara", -1, 2, "", this),
new Among ( "iera", -1, 2, "", this),
new Among ( "ia", -1, 2, "", this),
new Among ( "aria", 5, 2, "", this),
new Among ( "eria", 5, 2, "", this),
new Among ( "iria", 5, 2, "", this),
new Among ( "ad", -1, 2, "", this),
new Among ( "ed", -1, 2, "", this),
new Among ( "id", -1, 2, "", this),
new Among ( "ase", -1, 2, "", this),
new Among ( "iese", -1, 2, "", this),
new Among ( "aste", -1, 2, "", this),
new Among ( "iste", -1, 2, "", this),
new Among ( "an", -1, 2, "", this),
new Among ( "aban", 16, 2, "", this),
new Among ( "aran", 16, 2, "", this),
new Among ( "ieran", 16, 2, "", this),
new Among ( "ian", 16, 2, "", this),
new Among ( "arian", 20, 2, "", this),
new Among ( "erian", 20, 2, "", this),
new Among ( "irian", 20, 2, "", this),
new Among ( "en", -1, 1, "", this),
new Among ( "asen", 24, 2, "", this),
new Among ( "iesen", 24, 2, "", this),
new Among ( "aron", -1, 2, "", this),
new Among ( "ieron", -1, 2, "", this),
new Among ( "aran", -1, 2, "", this),
new Among ( "eran", -1, 2, "", this),
new Among ( "iran", -1, 2, "", this),
new Among ( "ado", -1, 2, "", this),
new Among ( "ido", -1, 2, "", this),
new Among ( "ando", -1, 2, "", this),
new Among ( "iendo", -1, 2, "", this),
new Among ( "ar", -1, 2, "", this),
new Among ( "er", -1, 2, "", this),
new Among ( "ir", -1, 2, "", this),
new Among ( "as", -1, 2, "", this),
new Among ( "abas", 39, 2, "", this),
new Among ( "adas", 39, 2, "", this),
new Among ( "idas", 39, 2, "", this),
new Among ( "aras", 39, 2, "", this),
new Among ( "ieras", 39, 2, "", this),
new Among ( "ias", 39, 2, "", this),
new Among ( "arias", 45, 2, "", this),
new Among ( "erias", 45, 2, "", this),
new Among ( "irias", 45, 2, "", this),
new Among ( "es", -1, 1, "", this),
new Among ( "ases", 49, 2, "", this),
new Among ( "ieses", 49, 2, "", this),
new Among ( "abais", -1, 2, "", this),
new Among ( "arais", -1, 2, "", this),
new Among ( "ierais", -1, 2, "", this),
new Among ( "iais", -1, 2, "", this),
new Among ( "ariais", 55, 2, "", this),
new Among ( "eriais", 55, 2, "", this),
new Among ( "iriais", 55, 2, "", this),
new Among ( "aseis", -1, 2, "", this),
new Among ( "ieseis", -1, 2, "", this),
new Among ( "asteis", -1, 2, "", this),
new Among ( "isteis", -1, 2, "", this),
new Among ( "ais", -1, 2, "", this),
new Among ( "eis", -1, 1, "", this),
new Among ( "areis", 64, 2, "", this),
new Among ( "ereis", 64, 2, "", this),
new Among ( "ireis", 64, 2, "", this),
new Among ( "ados", -1, 2, "", this),
new Among ( "idos", -1, 2, "", this),
new Among ( "amos", -1, 2, "", this),
new Among ( "abamos", 70, 2, "", this),
new Among ( "aramos", 70, 2, "", this),
new Among ( "ieramos", 70, 2, "", this),
new Among ( "iamos", 70, 2, "", this),
new Among ( "ariamos", 74, 2, "", this),
new Among ( "eriamos", 74, 2, "", this),
new Among ( "iriamos", 74, 2, "", this),
new Among ( "emos", -1, 1, "", this),
new Among ( "aremos", 78, 2, "", this),
new Among ( "eremos", 78, 2, "", this),
new Among ( "iremos", 78, 2, "", this),
new Among ( "asemos", 78, 2, "", this),
new Among ( "iesemos", 78, 2, "", this),
new Among ( "imos", -1, 2, "", this),
new Among ( "aras", -1, 2, "", this),
new Among ( "eras", -1, 2, "", this),
new Among ( "iras", -1, 2, "", this),
new Among ( "is", -1, 2, "", this),
new Among ( "ara", -1, 2, "", this),
new Among ( "era", -1, 2, "", this),
new Among ( "ira", -1, 2, "", this),
new Among ( "are", -1, 2, "", this),
new Among ( "ere", -1, 2, "", this),
new Among ( "ire", -1, 2, "", this),
new Among ( "io", -1, 2, "", this)
};
private Among a_9[] = {
new Among ( "a", -1, 1, "", this),
new Among ( "e", -1, 2, "", this),
new Among ( "o", -1, 1, "", this),
new Among ( "os", -1, 1, "", this),
new Among ( "a", -1, 1, "", this),
new Among ( "e", -1, 2, "", this),
new Among ( "i", -1, 1, "", this),
new Among ( "o", -1, 1, "", this)
};
private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 17, 4, 10 };
private int I_p2;
private int I_p1;
private int I_pV;
private void copy_from(Spanish2Stemmer other) {
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
super.copy_from(other);
}
private boolean r_mark_regions() {
int v_1;
int v_2;
int v_3;
int v_6;
int v_8;
// (, line 31
I_pV = limit;
I_p1 = limit;
I_p2 = limit;
// do, line 37
v_1 = cursor;
lab0: do {
// (, line 37
// or, line 39
lab1: do {
v_2 = cursor;
lab2: do {
// (, line 38
if (!(in_grouping(g_v, 97, 252)))
{
break lab2;
}
// or, line 38
lab3: do {
v_3 = cursor;
lab4: do {
// (, line 38
if (!(out_grouping(g_v, 97, 252)))
{
break lab4;
}
// gopast, line 38
golab5: while(true)
{
lab6: do {
if (!(in_grouping(g_v, 97, 252)))
{
break lab6;
}
break golab5;
} while (false);
if (cursor >= limit)
{
break lab4;
}
cursor++;
}
break lab3;
} while (false);
cursor = v_3;
// (, line 38
if (!(in_grouping(g_v, 97, 252)))
{
break lab2;
}
// gopast, line 38
golab7: while(true)
{
lab8: do {
if (!(out_grouping(g_v, 97, 252)))
{
break lab8;
}
break golab7;
} while (false);
if (cursor >= limit)
{
break lab2;
}
cursor++;
}
} while (false);
break lab1;
} while (false);
cursor = v_2;
// (, line 40
if (!(out_grouping(g_v, 97, 252)))
{
break lab0;
}
// or, line 40
lab9: do {
v_6 = cursor;
lab10: do {
// (, line 40
if (!(out_grouping(g_v, 97, 252)))
{
break lab10;
}
// gopast, line 40
golab11: while(true)
{
lab12: do {
if (!(in_grouping(g_v, 97, 252)))
{
break lab12;
}
break golab11;
} while (false);
if (cursor >= limit)
{
break lab10;
}
cursor++;
}
break lab9;
} while (false);
cursor = v_6;
// (, line 40
if (!(in_grouping(g_v, 97, 252)))
{
break lab0;
}
// next, line 40
if (cursor >= limit)
{
break lab0;
}
cursor++;
} while (false);
} while (false);
// setmark pV, line 41
I_pV = cursor;
} while (false);
cursor = v_1;
// do, line 43
v_8 = cursor;
lab13: do {
// (, line 43
// gopast, line 44
golab14: while(true)
{
lab15: do {
if (!(in_grouping(g_v, 97, 252)))
{
break lab15;
}
break golab14;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// gopast, line 44
golab16: while(true)
{
lab17: do {
if (!(out_grouping(g_v, 97, 252)))
{
break lab17;
}
break golab16;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// setmark p1, line 44
I_p1 = cursor;
// gopast, line 45
golab18: while(true)
{
lab19: do {
if (!(in_grouping(g_v, 97, 252)))
{
break lab19;
}
break golab18;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// gopast, line 45
golab20: while(true)
{
lab21: do {
if (!(out_grouping(g_v, 97, 252)))
{
break lab21;
}
break golab20;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// setmark p2, line 45
I_p2 = cursor;
} while (false);
cursor = v_8;
return true;
}
private boolean r_postlude() {
int among_var;
int v_1;
// repeat, line 49
replab0: while(true)
{
v_1 = cursor;
lab1: do {
// (, line 49
// [, line 50
bra = cursor;
// substring, line 50
among_var = find_among(a_0, 6);
if (among_var == 0)
{
break lab1;
}
// ], line 50
ket = cursor;
switch(among_var) {
case 0:
break lab1;
case 1:
// (, line 51
// <-, line 51
slice_from("a");
break;
case 2:
// (, line 52
// <-, line 52
slice_from("e");
break;
case 3:
// (, line 53
// <-, line 53
slice_from("i");
break;
case 4:
// (, line 54
// <-, line 54
slice_from("o");
break;
case 5:
// (, line 55
// <-, line 55
slice_from("u");
break;
case 6:
// (, line 57
// next, line 57
if (cursor >= limit)
{
break lab1;
}
cursor++;
break;
}
continue replab0;
} while (false);
cursor = v_1;
break replab0;
}
return true;
}
private boolean r_RV() {
if (!(I_pV <= cursor))
{
return false;
}
return true;
}
private boolean r_R1() {
if (!(I_p1 <= cursor))
{
return false;
}
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_attached_pronoun() {
int among_var;
// (, line 67
// [, line 68
ket = cursor;
// substring, line 68
if (find_among_b(a_1, 13) == 0)
{
return false;
}
// ], line 68
bra = cursor;
// substring, line 72
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
return false;
}
// call RV, line 72
if (!r_RV())
{
return false;
}
switch(among_var) {
case 0:
return false;
case 1:
// (, line 73
// ], line 73
bra = cursor;
// <-, line 73
slice_from("iendo");
break;
case 2:
// (, line 74
// ], line 74
bra = cursor;
// <-, line 74
slice_from("ando");
break;
case 3:
// (, line 75
// ], line 75
bra = cursor;
// <-, line 75
slice_from("ar");
break;
case 4:
// (, line 76
// ], line 76
bra = cursor;
// <-, line 76
slice_from("er");
break;
case 5:
// (, line 77
// ], line 77
bra = cursor;
// <-, line 77
slice_from("ir");
break;
case 6:
// (, line 81
// delete, line 81
slice_del();
break;
case 7:
// (, line 82
// literal, line 82
if (!(eq_s_b(1, "u")))
{
return false;
}
// delete, line 82
slice_del();
break;
}
return true;
}
private boolean r_standard_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
// (, line 86
// [, line 87
ket = cursor;
// substring, line 87
among_var = find_among_b(a_6, 42);
if (among_var == 0)
{
return false;
}
// ], line 87
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 98
// call R2, line 99
if (!r_R2())
{
return false;
}
// delete, line 99
slice_del();
break;
case 2:
// (, line 103
// call R2, line 104
if (!r_R2())
{
return false;
}
// delete, line 104
slice_del();
// try, line 105
v_1 = limit - cursor;
lab0: do {
// (, line 105
// [, line 105
ket = cursor;
// literal, line 105
if (!(eq_s_b(2, "ic")))
{
cursor = limit - v_1;
break lab0;
}
// ], line 105
bra = cursor;
// call R2, line 105
if (!r_R2())
{
cursor = limit - v_1;
break lab0;
}
// delete, line 105
slice_del();
} while (false);
break;
case 3:
// (, line 109
// call R2, line 110
if (!r_R2())
{
return false;
}
// <-, line 110
slice_from("log");
break;
case 4:
// (, line 113
// call R2, line 114
if (!r_R2())
{
return false;
}
// <-, line 114
slice_from("u");
break;
case 5:
// (, line 117
// call R2, line 118
if (!r_R2())
{
return false;
}
// <-, line 118
slice_from("ente");
break;
case 6:
// (, line 121
// call R1, line 122
if (!r_R1())
{
return false;
}
// delete, line 122
slice_del();
// try, line 123
v_2 = limit - cursor;
lab1: do {
// (, line 123
// [, line 124
ket = cursor;
// substring, line 124
among_var = find_among_b(a_3, 4);
if (among_var == 0)
{
cursor = limit - v_2;
break lab1;
}
// ], line 124
bra = cursor;
// call R2, line 124
if (!r_R2())
{
cursor = limit - v_2;
break lab1;
}
// delete, line 124
slice_del();
switch(among_var) {
case 0:
cursor = limit - v_2;
break lab1;
case 1:
// (, line 125
// [, line 125
ket = cursor;
// literal, line 125
if (!(eq_s_b(2, "at")))
{
cursor = limit - v_2;
break lab1;
}
// ], line 125
bra = cursor;
// call R2, line 125
if (!r_R2())
{
cursor = limit - v_2;
break lab1;
}
// delete, line 125
slice_del();
break;
}
} while (false);
break;
case 7:
// (, line 133
// call R2, line 134
if (!r_R2())
{
return false;
}
// delete, line 134
slice_del();
// try, line 135
v_3 = limit - cursor;
lab2: do {
// (, line 135
// [, line 136
ket = cursor;
// substring, line 136
among_var = find_among_b(a_4, 2);
if (among_var == 0)
{
cursor = limit - v_3;
break lab2;
}
// ], line 136
bra = cursor;
switch(among_var) {
case 0:
cursor = limit - v_3;
break lab2;
case 1:
// (, line 138
// call R2, line 138
if (!r_R2())
{
cursor = limit - v_3;
break lab2;
}
// delete, line 138
slice_del();
break;
}
} while (false);
break;
case 8:
// (, line 144
// call R2, line 145
if (!r_R2())
{
return false;
}
// delete, line 145
slice_del();
// try, line 146
v_4 = limit - cursor;
lab3: do {
// (, line 146
// [, line 147
ket = cursor;
// substring, line 147
among_var = find_among_b(a_5, 3);
if (among_var == 0)
{
cursor = limit - v_4;
break lab3;
}
// ], line 147
bra = cursor;
switch(among_var) {
case 0:
cursor = limit - v_4;
break lab3;
case 1:
// (, line 150
// call R2, line 150
if (!r_R2())
{
cursor = limit - v_4;
break lab3;
}
// delete, line 150
slice_del();
break;
}
} while (false);
break;
case 9:
// (, line 156
// call R2, line 157
if (!r_R2())
{
return false;
}
// delete, line 157
slice_del();
// try, line 158
v_5 = limit - cursor;
lab4: do {
// (, line 158
// [, line 159
ket = cursor;
// literal, line 159
if (!(eq_s_b(2, "at")))
{
cursor = limit - v_5;
break lab4;
}
// ], line 159
bra = cursor;
// call R2, line 159
if (!r_R2())
{
cursor = limit - v_5;
break lab4;
}
// delete, line 159
slice_del();
} while (false);
break;
}
return true;
}
private boolean r_y_verb_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 165
// setlimit, line 166
v_1 = limit - cursor;
// tomark, line 166
if (cursor < I_pV)
{
return false;
}
cursor = I_pV;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 166
// [, line 166
ket = cursor;
// substring, line 166
among_var = find_among_b(a_7, 12);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 166
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 169
// literal, line 169
if (!(eq_s_b(1, "u")))
{
return false;
}
// delete, line 169
slice_del();
break;
}
return true;
}
private boolean r_verb_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 173
// setlimit, line 174
v_1 = limit - cursor;
// tomark, line 174
if (cursor < I_pV)
{
return false;
}
cursor = I_pV;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 174
// [, line 174
ket = cursor;
// substring, line 174
among_var = find_among_b(a_8, 96);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 174
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 177
// try, line 177
v_3 = limit - cursor;
lab0: do {
// (, line 177
// literal, line 177
if (!(eq_s_b(1, "u")))
{
cursor = limit - v_3;
break lab0;
}
// test, line 177
v_4 = limit - cursor;
// literal, line 177
if (!(eq_s_b(1, "g")))
{
cursor = limit - v_3;
break lab0;
}
cursor = limit - v_4;
} while (false);
// ], line 177
bra = cursor;
// delete, line 177
slice_del();
break;
case 2:
// (, line 198
// delete, line 198
slice_del();
break;
}
return true;
}
private boolean r_residual_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 202
// [, line 203
ket = cursor;
// substring, line 203
among_var = find_among_b(a_9, 8);
if (among_var == 0)
{
return false;
}
// ], line 203
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 206
// call RV, line 206
if (!r_RV())
{
return false;
}
// delete, line 206
slice_del();
break;
case 2:
// (, line 208
// call RV, line 208
if (!r_RV())
{
return false;
}
// delete, line 208
slice_del();
// try, line 208
v_1 = limit - cursor;
lab0: do {
// (, line 208
// [, line 208
ket = cursor;
// literal, line 208
if (!(eq_s_b(1, "u")))
{
cursor = limit - v_1;
break lab0;
}
// ], line 208
bra = cursor;
// test, line 208
v_2 = limit - cursor;
// literal, line 208
if (!(eq_s_b(1, "g")))
{
cursor = limit - v_1;
break lab0;
}
cursor = limit - v_2;
// call RV, line 208
if (!r_RV())
{
cursor = limit - v_1;
break lab0;
}
// delete, line 208
slice_del();
} while (false);
break;
}
return true;
}
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
int v_6;
// (, line 213
// do, line 214
v_1 = cursor;
lab0: do {
// call mark_regions, line 214
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 215
limit_backward = cursor; cursor = limit;
// (, line 215
// do, line 216
v_2 = limit - cursor;
lab1: do {
// call attached_pronoun, line 216
if (!r_attached_pronoun())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 217
v_3 = limit - cursor;
lab2: do {
// (, line 217
// or, line 217
lab3: do {
v_4 = limit - cursor;
lab4: do {
// call standard_suffix, line 217
if (!r_standard_suffix())
{
break lab4;
}
break lab3;
} while (false);
cursor = limit - v_4;
lab5: do {
// call y_verb_suffix, line 218
if (!r_y_verb_suffix())
{
break lab5;
}
break lab3;
} while (false);
cursor = limit - v_4;
// call verb_suffix, line 219
if (!r_verb_suffix())
{
break lab2;
}
} while (false);
} while (false);
cursor = limit - v_3;
// do, line 221
v_5 = limit - cursor;
lab6: do {
// call residual_suffix, line 221
if (!r_residual_suffix())
{
break lab6;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; // do, line 223
v_6 = cursor;
lab7: do {
// call postlude, line 223
if (!r_postlude())
{
break lab7;
}
} while (false);
cursor = v_6;
return true;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org