You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/08/13 15:53:27 UTC
svn commit: r1372423 [8/45] - in /lucene/dev/branches/LUCENE-2878: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/common/
dev-tools/maven/lucene/analysis/icu/ de...
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Mon Aug 13 13:52:46 2012
@@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokena
*/
%%
-%unicode 6.0
+%unicode 6.1
%integer
%final
%public
@@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokena
%implements StandardTokenizerInterface
%function getNextToken
%char
+%buffer 4096
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
@@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Ext
// RFC-5321: Simple Mail Transfer Protocol
// RFC-5322: Internet Message Format
-%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+%include ASCIITLD.jflex-macro
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Mon Aug 13 13:52:46 2012
@@ -59,7 +59,7 @@ public abstract class AbstractAnalysisFa
* to inform user, that for this factory a {@link #luceneMatchVersion} is required */
protected final void assureMatchVersion() {
if (luceneMatchVersion == null) {
- throw new InitializationException("Configuration Error: Factory '" + this.getClass().getName() +
+ throw new IllegalArgumentException("Configuration Error: Factory '" + this.getClass().getName() +
"' needs a 'luceneMatchVersion' parameter");
}
}
@@ -86,7 +86,7 @@ public abstract class AbstractAnalysisFa
if (useDefault) {
return defaultVal;
}
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Integer.parseInt(s);
}
@@ -99,7 +99,7 @@ public abstract class AbstractAnalysisFa
String s = args.get(name);
if (s==null) {
if (useDefault) return defaultVal;
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Boolean.parseBoolean(s);
}
@@ -108,11 +108,11 @@ public abstract class AbstractAnalysisFa
try {
String pat = args.get(name);
if (null == pat) {
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Pattern.compile(args.get(name));
} catch (PatternSyntaxException e) {
- throw new InitializationException
+ throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
this.getClass().getSimpleName(), e);
}
@@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFa
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
- List<String> wlist = loader.getLines(file.trim());
+ List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
+
+ protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+ return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+ }
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Mon Aug 13 13:52:46 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
*/
import java.io.Reader;
+import java.util.Set;
import org.apache.lucene.analysis.CharFilter;
@@ -27,5 +28,39 @@ import org.apache.lucene.analysis.CharFi
*/
public abstract class CharFilterFactory extends AbstractAnalysisFactory {
- public abstract CharFilter create(Reader input);
+ private static final AnalysisSPILoader<CharFilterFactory> loader =
+ new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
+
+ /** looks up a charfilter by name from context classpath */
+ public static CharFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a charfilter class by name from context classpath */
+ public static Class<? extends CharFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available charfilter names */
+ public static Set<String> availableCharFilters() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableCharFilters()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadCharFilters(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
+ /** Wraps the given Reader with a CharFilter. */
+ public abstract Reader create(Reader input);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java Mon Aug 13 13:52:46 2012
@@ -19,29 +19,20 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.InputStream;
-import java.util.List;
/**
* Abstraction for loading resources (streams, files, and classes).
*/
public interface ResourceLoader {
+ /**
+ * Opens a named resource
+ */
public InputStream openResource(String resource) throws IOException;
/**
- * Accesses a resource by name and returns the (non comment) lines
- * containing data.
- *
- * <p>
- * A comment line is any line that starts with the character "#"
- * </p>
- *
- * @param resource
- * @return a list of non-blank non-comment lines with whitespace trimmed
- * from front and back.
- * @throws IOException
+ * Creates a class of the name and expected type
*/
- public List<String> getLines(String resource) throws IOException;
-
- public <T> T newInstance(String cname, Class<T> expectedType, String ... subpackages);
+ // TODO: fix exception handling
+ public <T> T newInstance(String cname, Class<T> expectedType);
}
\ No newline at end of file
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Mon Aug 13 13:52:46 2012
@@ -17,6 +17,8 @@
package org.apache.lucene.analysis.util;
+import java.io.IOException;
+
/**
* Interface for a component that needs to be initialized by
* an implementation of {@link ResourceLoader}.
@@ -25,5 +27,5 @@ package org.apache.lucene.analysis.util;
*/
public interface ResourceLoaderAware {
- void inform(ResourceLoader loader);
+ void inform(ResourceLoader loader) throws IOException;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java Mon Aug 13 13:52:46 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
+import java.util.Set;
+
import org.apache.lucene.analysis.TokenStream;
/**
@@ -25,6 +27,40 @@ import org.apache.lucene.analysis.TokenS
*/
public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader<TokenFilterFactory> loader =
+ new AnalysisSPILoader<TokenFilterFactory>(TokenFilterFactory.class,
+ new String[] { "TokenFilterFactory", "FilterFactory" });
+
+ /** looks up a tokenfilter by name from context classpath */
+ public static TokenFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenfilter class by name from context classpath */
+ public static Class<? extends TokenFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenfilter names from context classpath */
+ public static Set<String> availableTokenFilters() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableTokenFilters()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadTokenFilters(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
/** Transform the specified input TokenStream */
public abstract TokenStream create(TokenStream input);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java Mon Aug 13 13:52:46 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
+import java.util.Set;
/**
* Abstract parent class for analysis factories that create {@link Tokenizer}
@@ -27,6 +28,39 @@ import java.io.Reader;
*/
public abstract class TokenizerFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader<TokenizerFactory> loader =
+ new AnalysisSPILoader<TokenizerFactory>(TokenizerFactory.class);
+
+ /** looks up a tokenizer by name from context classpath */
+ public static TokenizerFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenizer class by name from context classpath */
+ public static Class<? extends TokenizerFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenizer names from context classpath */
+ public static Set<String> availableTokenizers() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableTokenizers()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadTokenizers(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
/** Creates a TokenStream of the specified input */
public abstract Tokenizer create(Reader input);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Mon Aug 13 13:52:46 2012
@@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util;
import java.io.BufferedReader;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
@@ -194,6 +198,47 @@ public class WordlistLoader {
return result;
}
+ /**
+ * Accesses a resource by name and returns the (non comment) lines containing
+ * data using the given character encoding.
+ *
+ * <p>
+ * A comment line is any line that starts with the character "#"
+ * </p>
+ *
+ * @return a list of non-blank non-comment lines with whitespace trimmed
+ * @throws IOException
+ */
+ public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+ BufferedReader input = null;
+ ArrayList<String> lines;
+ boolean success = false;
+ try {
+ input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+ lines = new ArrayList<String>();
+ for (String word=null; (word=input.readLine())!=null;) {
+ // skip initial bom marker
+ if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+ word = word.substring(1);
+ // skip comments
+ if (word.startsWith("#")) continue;
+ word=word.trim();
+ // skip blank lines
+ if (word.length()==0) continue;
+ lines.add(word);
+ }
+ success = true;
+ return lines;
+ } finally {
+ if (success) {
+ IOUtils.close(input);
+ } else {
+ IOUtils.closeWhileHandlingException(input);
+ }
+ }
+ }
+
private static BufferedReader getBufferedReader(Reader reader) {
return (reader instanceof BufferedReader) ? (BufferedReader) reader
: new BufferedReader(reader);
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Mon Aug 13 13:52:46 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.wikipedia;
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 08.07.12 17:00 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
- private static final int ZZ_BUFFERSIZE = 16384;
+ private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int THREE_SINGLE_QUOTES_STATE = 10;
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex Mon Aug 13 13:52:46 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokena
%function getNextToken
%pack
%char
+%buffer 4096
%{
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java?rev=1372423&r1=1372422&r2=1372423&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java Mon Aug 13 13:52:46 2012
@@ -1,423 +1,439 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
-import org.tartarus.snowball.SnowballProgram;
+
import org.tartarus.snowball.Among;
+import org.tartarus.snowball.SnowballProgram;
+
+ /**
+ * This class was automatically generated by a Snowball to Java compiler
+ * It implements the stemming algorithm defined by a snowball script.
+ */
-/**
- * Generated class implementing code defined by a snowball script.
- */
public class DanishStemmer extends SnowballProgram {
- private Among a_0[] = {
- new Among ( "hed", -1, 1, "", this),
- new Among ( "ethed", 0, 1, "", this),
- new Among ( "ered", -1, 1, "", this),
- new Among ( "e", -1, 1, "", this),
- new Among ( "erede", 3, 1, "", this),
- new Among ( "ende", 3, 1, "", this),
- new Among ( "erende", 5, 1, "", this),
- new Among ( "ene", 3, 1, "", this),
- new Among ( "erne", 3, 1, "", this),
- new Among ( "ere", 3, 1, "", this),
- new Among ( "en", -1, 1, "", this),
- new Among ( "heden", 10, 1, "", this),
- new Among ( "eren", 10, 1, "", this),
- new Among ( "er", -1, 1, "", this),
- new Among ( "heder", 13, 1, "", this),
- new Among ( "erer", 13, 1, "", this),
- new Among ( "s", -1, 2, "", this),
- new Among ( "heds", 16, 1, "", this),
- new Among ( "es", 16, 1, "", this),
- new Among ( "endes", 18, 1, "", this),
- new Among ( "erendes", 19, 1, "", this),
- new Among ( "enes", 18, 1, "", this),
- new Among ( "ernes", 18, 1, "", this),
- new Among ( "eres", 18, 1, "", this),
- new Among ( "ens", 16, 1, "", this),
- new Among ( "hedens", 24, 1, "", this),
- new Among ( "erens", 24, 1, "", this),
- new Among ( "ers", 16, 1, "", this),
- new Among ( "ets", 16, 1, "", this),
- new Among ( "erets", 28, 1, "", this),
- new Among ( "et", -1, 1, "", this),
- new Among ( "eret", 30, 1, "", this)
- };
-
- private Among a_1[] = {
- new Among ( "gd", -1, -1, "", this),
- new Among ( "dt", -1, -1, "", this),
- new Among ( "gt", -1, -1, "", this),
- new Among ( "kt", -1, -1, "", this)
- };
-
- private Among a_2[] = {
- new Among ( "ig", -1, 1, "", this),
- new Among ( "lig", 0, 1, "", this),
- new Among ( "elig", 1, 1, "", this),
- new Among ( "els", -1, 1, "", this),
- new Among ( "l\u00F8st", -1, 2, "", this)
- };
+private static final long serialVersionUID = 1L;
- private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+ private final static DanishStemmer methodObject = new DanishStemmer ();
- private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
+ private final static Among a_0[] = {
+ new Among ( "hed", -1, 1, "", methodObject ),
+ new Among ( "ethed", 0, 1, "", methodObject ),
+ new Among ( "ered", -1, 1, "", methodObject ),
+ new Among ( "e", -1, 1, "", methodObject ),
+ new Among ( "erede", 3, 1, "", methodObject ),
+ new Among ( "ende", 3, 1, "", methodObject ),
+ new Among ( "erende", 5, 1, "", methodObject ),
+ new Among ( "ene", 3, 1, "", methodObject ),
+ new Among ( "erne", 3, 1, "", methodObject ),
+ new Among ( "ere", 3, 1, "", methodObject ),
+ new Among ( "en", -1, 1, "", methodObject ),
+ new Among ( "heden", 10, 1, "", methodObject ),
+ new Among ( "eren", 10, 1, "", methodObject ),
+ new Among ( "er", -1, 1, "", methodObject ),
+ new Among ( "heder", 13, 1, "", methodObject ),
+ new Among ( "erer", 13, 1, "", methodObject ),
+ new Among ( "s", -1, 2, "", methodObject ),
+ new Among ( "heds", 16, 1, "", methodObject ),
+ new Among ( "es", 16, 1, "", methodObject ),
+ new Among ( "endes", 18, 1, "", methodObject ),
+ new Among ( "erendes", 19, 1, "", methodObject ),
+ new Among ( "enes", 18, 1, "", methodObject ),
+ new Among ( "ernes", 18, 1, "", methodObject ),
+ new Among ( "eres", 18, 1, "", methodObject ),
+ new Among ( "ens", 16, 1, "", methodObject ),
+ new Among ( "hedens", 24, 1, "", methodObject ),
+ new Among ( "erens", 24, 1, "", methodObject ),
+ new Among ( "ers", 16, 1, "", methodObject ),
+ new Among ( "ets", 16, 1, "", methodObject ),
+ new Among ( "erets", 28, 1, "", methodObject ),
+ new Among ( "et", -1, 1, "", methodObject ),
+ new Among ( "eret", 30, 1, "", methodObject )
+ };
+
+ private final static Among a_1[] = {
+ new Among ( "gd", -1, -1, "", methodObject ),
+ new Among ( "dt", -1, -1, "", methodObject ),
+ new Among ( "gt", -1, -1, "", methodObject ),
+ new Among ( "kt", -1, -1, "", methodObject )
+ };
+
+ private final static Among a_2[] = {
+ new Among ( "ig", -1, 1, "", methodObject ),
+ new Among ( "lig", 0, 1, "", methodObject ),
+ new Among ( "elig", 1, 1, "", methodObject ),
+ new Among ( "els", -1, 1, "", methodObject ),
+ new Among ( "l\u00F8st", -1, 2, "", methodObject )
+ };
+
+ private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+
+ private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private int I_x;
private int I_p1;
- private StringBuilder S_ch = new StringBuilder();
+ private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
- private void copy_from(DanishStemmer other) {
- I_x = other.I_x;
- I_p1 = other.I_p1;
- S_ch = other.S_ch;
- super.copy_from(other);
- }
+ private void copy_from(DanishStemmer other) {
+ I_x = other.I_x;
+ I_p1 = other.I_p1;
+ S_ch = other.S_ch;
+ super.copy_from(other);
+ }
- private boolean r_mark_regions() {
+ private boolean r_mark_regions() {
int v_1;
int v_2;
- // (, line 29
- I_p1 = limit;
- // test, line 33
- v_1 = cursor;
- // (, line 33
- // hop, line 33
- {
- int c = cursor + 3;
- if (0 > c || c > limit)
- {
- return false;
- }
- cursor = c;
- }
- // setmark x, line 33
- I_x = cursor;
- cursor = v_1;
- // goto, line 34
- golab0: while(true)
- {
- v_2 = cursor;
- lab1: do {
- if (!(in_grouping(g_v, 97, 248)))
- {
- break lab1;
- }
- cursor = v_2;
- break golab0;
- } while (false);
- cursor = v_2;
- if (cursor >= limit)
- {
- return false;
- }
- cursor++;
- }
- // gopast, line 34
- golab2: while(true)
- {
- lab3: do {
- if (!(out_grouping(g_v, 97, 248)))
- {
- break lab3;
- }
- break golab2;
- } while (false);
- if (cursor >= limit)
- {
- return false;
- }
- cursor++;
- }
- // setmark p1, line 34
- I_p1 = cursor;
- // try, line 35
- lab4: do {
- // (, line 35
- if (!(I_p1 < I_x))
- {
- break lab4;
+ // (, line 29
+ I_p1 = limit;
+ // test, line 33
+ v_1 = cursor;
+ // (, line 33
+ // hop, line 33
+ {
+ int c = cursor + 3;
+ if (0 > c || c > limit)
+ {
+ return false;
+ }
+ cursor = c;
+ }
+ // setmark x, line 33
+ I_x = cursor;
+ cursor = v_1;
+ // goto, line 34
+ golab0: while(true)
+ {
+ v_2 = cursor;
+ lab1: do {
+ if (!(in_grouping(g_v, 97, 248)))
+ {
+ break lab1;
+ }
+ cursor = v_2;
+ break golab0;
+ } while (false);
+ cursor = v_2;
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // gopast, line 34
+ golab2: while(true)
+ {
+ lab3: do {
+ if (!(out_grouping(g_v, 97, 248)))
+ {
+ break lab3;
+ }
+ break golab2;
+ } while (false);
+ if (cursor >= limit)
+ {
+ return false;
+ }
+ cursor++;
+ }
+ // setmark p1, line 34
+ I_p1 = cursor;
+ // try, line 35
+ lab4: do {
+ // (, line 35
+ if (!(I_p1 < I_x))
+ {
+ break lab4;
+ }
+ I_p1 = I_x;
+ } while (false);
+ return true;
}
- I_p1 = I_x;
- } while (false);
- return true;
- }
- private boolean r_main_suffix() {
+ private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
- // (, line 40
- // setlimit, line 41
- v_1 = limit - cursor;
- // tomark, line 41
- if (cursor < I_p1)
- {
- return false;
- }
- cursor = I_p1;
- v_2 = limit_backward;
- limit_backward = cursor;
- cursor = limit - v_1;
- // (, line 41
- // [, line 41
- ket = cursor;
- // substring, line 41
- among_var = find_among_b(a_0, 32);
- if (among_var == 0)
- {
- limit_backward = v_2;
- return false;
- }
- // ], line 41
- bra = cursor;
- limit_backward = v_2;
- switch(among_var) {
- case 0:
- return false;
- case 1:
- // (, line 48
- // delete, line 48
- slice_del();
- break;
- case 2:
- // (, line 50
- if (!(in_grouping_b(g_s_ending, 97, 229)))
+ // (, line 40
+ // setlimit, line 41
+ v_1 = limit - cursor;
+ // tomark, line 41
+ if (cursor < I_p1)
{
return false;
}
- // delete, line 50
- slice_del();
- break;
- }
- return true;
- }
+ cursor = I_p1;
+ v_2 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_1;
+ // (, line 41
+ // [, line 41
+ ket = cursor;
+ // substring, line 41
+ among_var = find_among_b(a_0, 32);
+ if (among_var == 0)
+ {
+ limit_backward = v_2;
+ return false;
+ }
+ // ], line 41
+ bra = cursor;
+ limit_backward = v_2;
+ switch(among_var) {
+ case 0:
+ return false;
+ case 1:
+ // (, line 48
+ // delete, line 48
+ slice_del();
+ break;
+ case 2:
+ // (, line 50
+ if (!(in_grouping_b(g_s_ending, 97, 229)))
+ {
+ return false;
+ }
+ // delete, line 50
+ slice_del();
+ break;
+ }
+ return true;
+ }
- private boolean r_consonant_pair() {
+ private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
- // (, line 54
- // test, line 55
- v_1 = limit - cursor;
- // (, line 55
- // setlimit, line 56
- v_2 = limit - cursor;
- // tomark, line 56
- if (cursor < I_p1)
- {
- return false;
- }
- cursor = I_p1;
- v_3 = limit_backward;
- limit_backward = cursor;
- cursor = limit - v_2;
- // (, line 56
- // [, line 56
- ket = cursor;
- // substring, line 56
- if (find_among_b(a_1, 4) == 0)
- {
- limit_backward = v_3;
- return false;
- }
- // ], line 56
- bra = cursor;
- limit_backward = v_3;
- cursor = limit - v_1;
- // next, line 62
- if (cursor <= limit_backward)
- {
- return false;
- }
- cursor--;
- // ], line 62
- bra = cursor;
- // delete, line 62
- slice_del();
- return true;
- }
+ // (, line 54
+ // test, line 55
+ v_1 = limit - cursor;
+ // (, line 55
+ // setlimit, line 56
+ v_2 = limit - cursor;
+ // tomark, line 56
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_3 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_2;
+ // (, line 56
+ // [, line 56
+ ket = cursor;
+ // substring, line 56
+ if (find_among_b(a_1, 4) == 0)
+ {
+ limit_backward = v_3;
+ return false;
+ }
+ // ], line 56
+ bra = cursor;
+ limit_backward = v_3;
+ cursor = limit - v_1;
+ // next, line 62
+ if (cursor <= limit_backward)
+ {
+ return false;
+ }
+ cursor--;
+ // ], line 62
+ bra = cursor;
+ // delete, line 62
+ slice_del();
+ return true;
+ }
- private boolean r_other_suffix() {
+ private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
int v_4;
- // (, line 65
- // do, line 66
- v_1 = limit - cursor;
- lab0: do {
- // (, line 66
- // [, line 66
- ket = cursor;
- // literal, line 66
- if (!(eq_s_b(2, "st")))
- {
- break lab0;
- }
- // ], line 66
- bra = cursor;
- // literal, line 66
- if (!(eq_s_b(2, "ig")))
- {
- break lab0;
- }
- // delete, line 66
- slice_del();
- } while (false);
- cursor = limit - v_1;
- // setlimit, line 67
- v_2 = limit - cursor;
- // tomark, line 67
- if (cursor < I_p1)
- {
- return false;
- }
- cursor = I_p1;
- v_3 = limit_backward;
- limit_backward = cursor;
- cursor = limit - v_2;
- // (, line 67
- // [, line 67
- ket = cursor;
- // substring, line 67
- among_var = find_among_b(a_2, 5);
- if (among_var == 0)
- {
- limit_backward = v_3;
- return false;
- }
- // ], line 67
- bra = cursor;
- limit_backward = v_3;
- switch(among_var) {
- case 0:
- return false;
- case 1:
- // (, line 70
- // delete, line 70
- slice_del();
- // do, line 70
- v_4 = limit - cursor;
- lab1: do {
- // call consonant_pair, line 70
- if (!r_consonant_pair())
+ // (, line 65
+ // do, line 66
+ v_1 = limit - cursor;
+ lab0: do {
+ // (, line 66
+ // [, line 66
+ ket = cursor;
+ // literal, line 66
+ if (!(eq_s_b(2, "st")))
{
- break lab1;
+ break lab0;
}
+ // ], line 66
+ bra = cursor;
+ // literal, line 66
+ if (!(eq_s_b(2, "ig")))
+ {
+ break lab0;
+ }
+ // delete, line 66
+ slice_del();
} while (false);
- cursor = limit - v_4;
- break;
- case 2:
- // (, line 72
- // <-, line 72
- slice_from("l\u00F8s");
- break;
- }
- return true;
- }
+ cursor = limit - v_1;
+ // setlimit, line 67
+ v_2 = limit - cursor;
+ // tomark, line 67
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_3 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_2;
+ // (, line 67
+ // [, line 67
+ ket = cursor;
+ // substring, line 67
+ among_var = find_among_b(a_2, 5);
+ if (among_var == 0)
+ {
+ limit_backward = v_3;
+ return false;
+ }
+ // ], line 67
+ bra = cursor;
+ limit_backward = v_3;
+ switch(among_var) {
+ case 0:
+ return false;
+ case 1:
+ // (, line 70
+ // delete, line 70
+ slice_del();
+ // do, line 70
+ v_4 = limit - cursor;
+ lab1: do {
+ // call consonant_pair, line 70
+ if (!r_consonant_pair())
+ {
+ break lab1;
+ }
+ } while (false);
+ cursor = limit - v_4;
+ break;
+ case 2:
+ // (, line 72
+ // <-, line 72
+ slice_from("l\u00F8s");
+ break;
+ }
+ return true;
+ }
- private boolean r_undouble() {
+ private boolean r_undouble() {
int v_1;
int v_2;
- // (, line 75
- // setlimit, line 76
- v_1 = limit - cursor;
- // tomark, line 76
- if (cursor < I_p1)
- {
- return false;
- }
- cursor = I_p1;
- v_2 = limit_backward;
- limit_backward = cursor;
- cursor = limit - v_1;
- // (, line 76
- // [, line 76
- ket = cursor;
- if (!(out_grouping_b(g_v, 97, 248)))
- {
- limit_backward = v_2;
- return false;
- }
- // ], line 76
- bra = cursor;
- // -> ch, line 76
- S_ch = slice_to(S_ch);
- limit_backward = v_2;
- // name ch, line 77
- if (!(eq_v_b(S_ch)))
- {
- return false;
- }
- // delete, line 78
- slice_del();
- return true;
- }
+ // (, line 75
+ // setlimit, line 76
+ v_1 = limit - cursor;
+ // tomark, line 76
+ if (cursor < I_p1)
+ {
+ return false;
+ }
+ cursor = I_p1;
+ v_2 = limit_backward;
+ limit_backward = cursor;
+ cursor = limit - v_1;
+ // (, line 76
+ // [, line 76
+ ket = cursor;
+ if (!(out_grouping_b(g_v, 97, 248)))
+ {
+ limit_backward = v_2;
+ return false;
+ }
+ // ], line 76
+ bra = cursor;
+ // -> ch, line 76
+ S_ch = slice_to(S_ch);
+ limit_backward = v_2;
+ // name ch, line 77
+ if (!(eq_v_b(S_ch)))
+ {
+ return false;
+ }
+ // delete, line 78
+ slice_del();
+ return true;
+ }
- public boolean stem() {
+ public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
- // (, line 82
- // do, line 84
- v_1 = cursor;
- lab0: do {
- // call mark_regions, line 84
- if (!r_mark_regions())
- {
- break lab0;
- }
- } while (false);
- cursor = v_1;
- // backwards, line 85
- limit_backward = cursor; cursor = limit;
- // (, line 85
- // do, line 86
- v_2 = limit - cursor;
- lab1: do {
- // call main_suffix, line 86
- if (!r_main_suffix())
- {
- break lab1;
- }
- } while (false);
- cursor = limit - v_2;
- // do, line 87
- v_3 = limit - cursor;
- lab2: do {
- // call consonant_pair, line 87
- if (!r_consonant_pair())
- {
- break lab2;
- }
- } while (false);
- cursor = limit - v_3;
- // do, line 88
- v_4 = limit - cursor;
- lab3: do {
- // call other_suffix, line 88
- if (!r_other_suffix())
- {
- break lab3;
- }
- } while (false);
- cursor = limit - v_4;
- // do, line 89
- v_5 = limit - cursor;
- lab4: do {
- // call undouble, line 89
- if (!r_undouble())
- {
- break lab4;
+ // (, line 82
+ // do, line 84
+ v_1 = cursor;
+ lab0: do {
+ // call mark_regions, line 84
+ if (!r_mark_regions())
+ {
+ break lab0;
+ }
+ } while (false);
+ cursor = v_1;
+ // backwards, line 85
+ limit_backward = cursor; cursor = limit;
+ // (, line 85
+ // do, line 86
+ v_2 = limit - cursor;
+ lab1: do {
+ // call main_suffix, line 86
+ if (!r_main_suffix())
+ {
+ break lab1;
+ }
+ } while (false);
+ cursor = limit - v_2;
+ // do, line 87
+ v_3 = limit - cursor;
+ lab2: do {
+ // call consonant_pair, line 87
+ if (!r_consonant_pair())
+ {
+ break lab2;
+ }
+ } while (false);
+ cursor = limit - v_3;
+ // do, line 88
+ v_4 = limit - cursor;
+ lab3: do {
+ // call other_suffix, line 88
+ if (!r_other_suffix())
+ {
+ break lab3;
+ }
+ } while (false);
+ cursor = limit - v_4;
+ // do, line 89
+ v_5 = limit - cursor;
+ lab4: do {
+ // call undouble, line 89
+ if (!r_undouble())
+ {
+ break lab4;
+ }
+ } while (false);
+ cursor = limit - v_5;
+ cursor = limit_backward; return true;
}
- } while (false);
- cursor = limit - v_5;
- cursor = limit_backward; return true;
+
+ public boolean equals( Object o ) {
+ return o instanceof DanishStemmer;
}
-}
+ public int hashCode() {
+ return DanishStemmer.class.getName().hashCode();
+ }
+
+
+}