You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/11/10 17:02:41 UTC
[lucy-commits] svn commit: r1033549 [1/9] - in /incubator/lucy/trunk: ./
core/Lucy/Analysis/ devel/conf/ modules/ modules/analysis/
modules/analysis/snowstem/ modules/analysis/snowstem/devel/
modules/analysis/snowstem/source/
modules/analysis/snowstem/source/include/...
Author: marvin
Date: Wed Nov 10 16:02:40 2010
New Revision: 1033549
URL: http://svn.apache.org/viewvc?rev=1033549&view=rev
Log:
LUCY-125
Bundle Snowball stemming libraries with Lucy, eliminating dependency on CPAN
module Lingua::Stem::Snowball.
Added:
incubator/lucy/trunk/modules/
incubator/lucy/trunk/modules/analysis/
incubator/lucy/trunk/modules/analysis/snowstem/
incubator/lucy/trunk/modules/analysis/snowstem/devel/
incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl
incubator/lucy/trunk/modules/analysis/snowstem/source/
incubator/lucy/trunk/modules/analysis/snowstem/source/include/
incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h
incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/
incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c
incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h
incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/
incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c
incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h
incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h
incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_english.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_english.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_finnish.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_finnish.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_french.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_french.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_german.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_german.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_hungarian.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_hungarian.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_italian.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_italian.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_norwegian.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_norwegian.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_porter.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_porter.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_portuguese.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_portuguese.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_romanian.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_romanian.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_russian.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_russian.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_spanish.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_spanish.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_swedish.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_swedish.h
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_turkish.c
incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_turkish.h
Removed:
incubator/lucy/trunk/perl/xs/Lucy/Analysis/Stemmer.c
Modified:
incubator/lucy/trunk/LICENSE
incubator/lucy/trunk/NOTICE
incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c
incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh
incubator/lucy/trunk/devel/conf/lucyperl.supp
incubator/lucy/trunk/perl/Build.PL
incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm
incubator/lucy/trunk/perl/lib/Lucy.pm
incubator/lucy/trunk/perl/lib/Lucy/Analysis/Stemmer.pm
Modified: incubator/lucy/trunk/LICENSE
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/LICENSE?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/LICENSE (original)
+++ incubator/lucy/trunk/LICENSE Wed Nov 10 16:02:40 2010
@@ -298,4 +298,34 @@ the license for those materials:
registered in some jurisdictions. All other trademarks and registered
trademarks mentioned herein are the property of their respective owners.
+Portions of the Snowball stemming library are bundled with this distribution
+under modules/analysis/snowstem. Here is the license for those materials:
+
+ Copyright (c) 2001, Dr Martin Porter
+ Copyright (c) 2002, Richard Boulton
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the copyright holders nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
Modified: incubator/lucy/trunk/NOTICE
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/NOTICE?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/NOTICE (original)
+++ incubator/lucy/trunk/NOTICE Wed Nov 10 16:02:40 2010
@@ -10,3 +10,7 @@ International Business Machines Corporat
This software contains code derived from Unicode data available from
<http://www.unicode.org/Public/> and Copyright 1991-2010 Unicode, Inc.
+This software bundles code developed by the Snowball project at
+<http://snowball.tartarus.org>, Copyright (c) 2001, Dr Martin Porter and
+Copyright (c) 2002, Richard Boulton.
+
Modified: incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c Wed Nov 10 16:02:40 2010
@@ -23,10 +23,7 @@
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
-Stemmer_sb_stemmer_new_t Stemmer_sb_stemmer_new = NULL;
-Stemmer_sb_stemmer_delete_t Stemmer_sb_stemmer_delete = NULL;
-Stemmer_sb_stemmer_stem_t Stemmer_sb_stemmer_stem = NULL;
-Stemmer_sb_stemmer_length_t Stemmer_sb_stemmer_length = NULL;
+#include "libstemmer.h"
Stemmer*
Stemmer_new(const CharBuf *language)
@@ -43,11 +40,10 @@ Stemmer_init(Stemmer *self, const CharBu
self->language = CB_Clone(language);
// Get a Snowball stemmer. Be case-insensitive.
- Stemmer_load_snowball();
lang_buf[0] = tolower(CB_Code_Point_At(language, 0));
lang_buf[1] = tolower(CB_Code_Point_At(language, 1));
lang_buf[2] = '\0';
- self->snowstemmer = lucy_Stemmer_sb_stemmer_new(lang_buf, "UTF_8");
+ self->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
if (!self->snowstemmer)
THROW(ERR, "Can't find a Snowball stemmer for %o", language);
@@ -58,7 +54,7 @@ void
Stemmer_destroy(Stemmer *self)
{
if (self->snowstemmer) {
- lucy_Stemmer_sb_stemmer_delete((struct sb_stemmer*)self->snowstemmer);
+ sb_stemmer_delete((struct sb_stemmer*)self->snowstemmer);
}
DECREF(self->language);
SUPER_DESTROY(self, STEMMER);
@@ -72,9 +68,9 @@ Stemmer_transform(Stemmer *self, Inversi
= (struct sb_stemmer*)self->snowstemmer;
while (NULL != (token = Inversion_Next(inversion))) {
- sb_symbol *stemmed_text = lucy_Stemmer_sb_stemmer_stem(snowstemmer,
+ const sb_symbol *stemmed_text = sb_stemmer_stem(snowstemmer,
(sb_symbol*)token->text, token->len);
- size_t len = lucy_Stemmer_sb_stemmer_length(snowstemmer);
+ size_t len = sb_stemmer_length(snowstemmer);
if (len > token->len) {
FREEMEM(token->text);
token->text = (char*)MALLOCATE(len + 1);
Modified: incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh Wed Nov 10 16:02:40 2010
@@ -16,35 +16,6 @@
parcel Lucy;
-__C__
-typedef unsigned char sb_symbol;
-struct sb_stemmer;
-
-typedef struct sb_stemmer*
-(*lucy_Stemmer_sb_stemmer_new_t)(const char *algorithm, const char *encoding);
-typedef void
-(*lucy_Stemmer_sb_stemmer_delete_t)(struct sb_stemmer *snowstemmer);
-typedef sb_symbol*
-(*lucy_Stemmer_sb_stemmer_stem_t)(struct sb_stemmer *snowstemmer,
- const sb_symbol *text, int len);
-typedef int
-(*lucy_Stemmer_sb_stemmer_length_t)(struct sb_stemmer *snowstemmer);
-extern lucy_Stemmer_sb_stemmer_new_t lucy_Stemmer_sb_stemmer_new;
-extern lucy_Stemmer_sb_stemmer_delete_t lucy_Stemmer_sb_stemmer_delete;
-extern lucy_Stemmer_sb_stemmer_stem_t lucy_Stemmer_sb_stemmer_stem;
-extern lucy_Stemmer_sb_stemmer_length_t lucy_Stemmer_sb_stemmer_length;
-#ifdef LUCY_USE_SHORT_NAMES
- #define Stemmer_sb_stemmer_new_t lucy_Stemmer_sb_stemmer_new_t
- #define Stemmer_sb_stemmer_delete_t lucy_Stemmer_sb_stemmer_delete_t
- #define Stemmer_sb_stemmer_stem_t lucy_Stemmer_sb_stemmer_stem_t
- #define Stemmer_sb_stemmer_length_t lucy_Stemmer_sb_stemmer_length_t
- #define Stemmer_sb_stemmer_new lucy_Stemmer_sb_stemmer_new
- #define Stemmer_sb_stemmer_delete lucy_Stemmer_sb_stemmer_delete
- #define Stemmer_sb_stemmer_stem lucy_Stemmer_sb_stemmer_stem
- #define Stemmer_sb_stemmer_length lucy_Stemmer_sb_stemmer_length
-#endif
-__END_C__
-
/** Reduce related words to a shared root.
*
* Stemmer is an L<Analyzer|Lucy::Analysis::Analyzer> which reduces
@@ -82,11 +53,6 @@ class Lucy::Analysis::Stemmer inherits L
public bool_t
Equals(Stemmer *self, Obj *other);
- /** Load the Snowball stemming library. Called by the constructor.
- */
- inert void
- load_snowball();
-
public void
Destroy(Stemmer *self);
}
Modified: incubator/lucy/trunk/devel/conf/lucyperl.supp
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/devel/conf/lucyperl.supp?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/devel/conf/lucyperl.supp (original)
+++ incubator/lucy/trunk/devel/conf/lucyperl.supp Wed Nov 10 16:02:40 2010
@@ -101,13 +101,4 @@
fun:*
}
-{
- <Snowball XS constructor>
- Memcheck:Leak
- fun:calloc
- fun:Perl_safesyscalloc
- fun:XS_Lingua__Stem__Snowball__Stemmifier_new
- fun:*
-}
-
Added: incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl Wed Nov 10 16:02:40 2010
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use File::Spec::Functions qw( catfile catdir no_upwards );
+use File::Copy qw( copy );
+use Cwd qw( getcwd );
+
+if ( @ARGV != 2 ) {
+ die "Usage: perl update_snowstem.pl SNOWBALL_SVN_CO LUCY_SNOWBALL_DIR";
+}
+
+my ( $snow_co_dir, $dest_dir ) = @ARGV;
+die("Not a directory: '$snow_co_dir'") unless -d $snow_co_dir;
+
+my $retval = system( "svn", "update", "-r", "541", $snow_co_dir );
+die "svn update failed" if ( $retval >> 8 );
+
+my $oldpwd = getcwd();
+my $snow_build_dir = catdir( $snow_co_dir, 'snowball' );
+chdir($snow_build_dir) or die $!;
+$retval = system("make dist_libstemmer_c");
+die "'make dist_libstemmer_c' failed" if ( $retval >> 8 );
+chdir($oldpwd) or die $!;
+
+# Copy only UTF-8 Stemmer files. Keep directory structure intact so that
+# compilation succeeds.
+copy_dir_contents( 'src_c', qr/UTF/ );
+copy_dir_contents('include');
+copy_dir_contents('runtime');
+copy_dir_contents( 'libstemmer', qr/utf8.[ch]$/ );
+
+# Add include guard to libstemmer.h.
+my $libstemmer_h_path
+ = catfile( $dest_dir, qw( source include libstemmer.h ) );
+open( my $libstemmer_h_fh, '<', $libstemmer_h_path )
+ or die "Can't open '$libstemmer_h_path': $!";
+my $libstemmer_h_content = do { local $/; <$libstemmer_h_fh> };
+close $libstemmer_h_fh or die $!;
+open( $libstemmer_h_fh, '>', $libstemmer_h_path )
+ or die "Can't open '$libstemmer_h_path': $!";
+print $libstemmer_h_fh <<END_STUFF;
+#ifndef H_LIBSTEMMER
+#define H_LIBSTEMMER
+
+$libstemmer_h_content
+
+#endif /* H_LIBSTEMMER */
+
+END_STUFF
+
+sub copy_dir_contents {
+ my ( $dir_name, $pattern ) = @_;
+ my $from_dir = catdir( $snow_build_dir, $dir_name );
+ my $to_dir = catdir( $dest_dir, 'source', $dir_name );
+ opendir( my $dh, $from_dir )
+ or die "Can't opendir '$from_dir': $!";
+ die "Not a directory: '$to_dir'" unless -d $to_dir;
+ for my $file ( no_upwards( readdir $dh ) ) {
+ next if $pattern && $file !~ $pattern;
+ next if $file =~ /\.svn/;
+ my $from = catfile( $from_dir, $file );
+ my $to = catfile( $to_dir, $file );
+ copy( $from, $to ) or die "Can't copy '$from' to '$to': $!";
+ }
+ closedir $dh or die $!;
+}
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,86 @@
+#ifndef H_LIBSTEMMER
+#define H_LIBSTEMMER
+
+
+/* Make header file work when included from C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sb_stemmer;
+typedef unsigned char sb_symbol;
+
+/* FIXME - should be able to get a version number for each stemming
+ * algorithm (which will be incremented each time the output changes). */
+
+/** Returns an array of the names of the available stemming algorithms.
+ * Note that these are the canonical names - aliases (ie, other names for
+ * the same algorithm) will not be included in the list.
+ * The list is terminated with a null pointer.
+ *
+ * The list must not be modified in any way.
+ */
+const char ** sb_stemmer_list(void);
+
+/** Create a new stemmer object, using the specified algorithm, for the
+ * specified character encoding.
+ *
+ * All algorithms will usually be available in UTF-8, but may also be
+ * available in other character encodings.
+ *
+ * @param algorithm The algorithm name. This is either the english
+ * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
+ * language. Note that case is significant in this parameter - the
+ * value should be supplied in lower case.
+ *
+ * @param charenc The character encoding. NULL may be passed as
+ * this value, in which case UTF-8 encoding will be assumed. Otherwise,
+ * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1),
+ * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that
+ * case is significant in this parameter.
+ *
+ * @return NULL if the specified algorithm is not recognised, or the
+ * algorithm is not available for the requested encoding. Otherwise,
+ * returns a pointer to a newly created stemmer for the requested algorithm.
+ * The returned pointer must be deleted by calling sb_stemmer_delete().
+ *
+ * @note NULL will also be returned if an out of memory error occurs.
+ */
+struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
+
+/** Delete a stemmer object.
+ *
+ * This frees all resources allocated for the stemmer. After calling
+ * this function, the supplied stemmer may no longer be used in any way.
+ *
+ * It is safe to pass a null pointer to this function - this will have
+ * no effect.
+ */
+void sb_stemmer_delete(struct sb_stemmer * stemmer);
+
+/** Stem a word.
+ *
+ * The return value is owned by the stemmer - it must not be freed or
+ * modified, and it will become invalid when the stemmer is called again,
+ * or if the stemmer is freed.
+ *
+ * The length of the return value can be obtained using sb_stemmer_length().
+ *
+ * If an out-of-memory error occurs, this will return NULL.
+ */
+const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer,
+ const sb_symbol * word, int size);
+
+/** Get the length of the result of the last stemmed word.
+ * This should not be called before sb_stemmer_stem() has been called.
+ */
+int sb_stemmer_length(struct sb_stemmer * stemmer);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* H_LIBSTEMMER */
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,95 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include "../include/libstemmer.h"
+#include "../runtime/api.h"
+#include "modules_utf8.h"
+
+struct sb_stemmer {
+ struct SN_env * (*create)(void);
+ void (*close)(struct SN_env *);
+ int (*stem)(struct SN_env *);
+
+ struct SN_env * env;
+};
+
+extern const char **
+sb_stemmer_list(void)
+{
+ return algorithm_names;
+}
+
+static stemmer_encoding_t
+sb_getenc(const char * charenc)
+{
+ struct stemmer_encoding * encoding;
+ if (charenc == NULL) return ENC_UTF_8;
+ for (encoding = encodings; encoding->name != 0; encoding++) {
+ if (strcmp(encoding->name, charenc) == 0) break;
+ }
+ if (encoding->name == NULL) return ENC_UNKNOWN;
+ return encoding->enc;
+}
+
+extern struct sb_stemmer *
+sb_stemmer_new(const char * algorithm, const char * charenc)
+{
+ stemmer_encoding_t enc;
+ struct stemmer_modules * module;
+ struct sb_stemmer * stemmer;
+
+ enc = sb_getenc(charenc);
+ if (enc == ENC_UNKNOWN) return NULL;
+
+ for (module = modules; module->name != 0; module++) {
+ if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
+ }
+ if (module->name == NULL) return NULL;
+
+ stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
+ if (stemmer == NULL) return NULL;
+
+ stemmer->create = module->create;
+ stemmer->close = module->close;
+ stemmer->stem = module->stem;
+
+ stemmer->env = stemmer->create();
+ if (stemmer->env == NULL)
+ {
+ sb_stemmer_delete(stemmer);
+ return NULL;
+ }
+
+ return stemmer;
+}
+
+void
+sb_stemmer_delete(struct sb_stemmer * stemmer)
+{
+ if (stemmer == 0) return;
+ if (stemmer->close == 0) return;
+ stemmer->close(stemmer->env);
+ stemmer->close = 0;
+ free(stemmer);
+}
+
+const sb_symbol *
+sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
+{
+ int ret;
+ if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
+ {
+ stemmer->env->l = 0;
+ return NULL;
+ }
+ ret = stemmer->stem(stemmer->env);
+ if (ret < 0) return NULL;
+ stemmer->env->p[stemmer->env->l] = 0;
+ return (const sb_symbol *)(stemmer->env->p);
+}
+
+int
+sb_stemmer_length(struct sb_stemmer * stemmer)
+{
+ return stemmer->env->l;
+}
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,121 @@
+/* libstemmer/modules_utf8.h: List of stemming modules.
+ *
+ * This file is generated by mkmodules.pl from a list of module names.
+ * Do not edit manually.
+ *
+ * Modules included by this file are: danish, dutch, english, finnish, french,
+ * german, hungarian, italian, norwegian, porter, portuguese, romanian,
+ * russian, spanish, swedish, turkish
+ */
+
+#include "../src_c/stem_UTF_8_danish.h"
+#include "../src_c/stem_UTF_8_dutch.h"
+#include "../src_c/stem_UTF_8_english.h"
+#include "../src_c/stem_UTF_8_finnish.h"
+#include "../src_c/stem_UTF_8_french.h"
+#include "../src_c/stem_UTF_8_german.h"
+#include "../src_c/stem_UTF_8_hungarian.h"
+#include "../src_c/stem_UTF_8_italian.h"
+#include "../src_c/stem_UTF_8_norwegian.h"
+#include "../src_c/stem_UTF_8_porter.h"
+#include "../src_c/stem_UTF_8_portuguese.h"
+#include "../src_c/stem_UTF_8_romanian.h"
+#include "../src_c/stem_UTF_8_russian.h"
+#include "../src_c/stem_UTF_8_spanish.h"
+#include "../src_c/stem_UTF_8_swedish.h"
+#include "../src_c/stem_UTF_8_turkish.h"
+
+typedef enum {
+ ENC_UNKNOWN=0,
+ ENC_UTF_8
+} stemmer_encoding_t;
+
+struct stemmer_encoding {
+ const char * name;
+ stemmer_encoding_t enc;
+};
+static struct stemmer_encoding encodings[] = {
+ {"UTF_8", ENC_UTF_8},
+ {0,ENC_UNKNOWN}
+};
+
+struct stemmer_modules {
+ const char * name;
+ stemmer_encoding_t enc;
+ struct SN_env * (*create)(void);
+ void (*close)(struct SN_env *);
+ int (*stem)(struct SN_env *);
+};
+static struct stemmer_modules modules[] = {
+ {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+ {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+ {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+ {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+ {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+ {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+ {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+ {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+ {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+ {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+ {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+ {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+ {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+ {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+ {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+ {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+ {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+ {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+ {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+ {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+ {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+ {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+ {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+ {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+ {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+ {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+ {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+ {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+ {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+ {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+ {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+ {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+ {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+ {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
+ {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+ {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+ {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+ {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+ {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+ {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+ {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+ {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+ {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+ {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+ {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+ {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+ {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+ {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+ {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+ {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+ {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+ {0,ENC_UNKNOWN,0,0,0}
+};
+static const char * algorithm_names[] = {
+ "danish",
+ "dutch",
+ "english",
+ "finnish",
+ "french",
+ "german",
+ "hungarian",
+ "italian",
+ "norwegian",
+ "porter",
+ "portuguese",
+ "romanian",
+ "russian",
+ "spanish",
+ "swedish",
+ "turkish",
+ 0
+};
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,66 @@
+
+#include <stdlib.h> /* for calloc, free */
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{
+ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+ if (z == NULL) return NULL;
+ z->p = create_s();
+ if (z->p == NULL) goto error;
+ if (S_size)
+ {
+ int i;
+ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+ if (z->S == NULL) goto error;
+
+ for (i = 0; i < S_size; i++)
+ {
+ z->S[i] = create_s();
+ if (z->S[i] == NULL) goto error;
+ }
+ }
+
+ if (I_size)
+ {
+ z->I = (int *) calloc(I_size, sizeof(int));
+ if (z->I == NULL) goto error;
+ }
+
+ if (B_size)
+ {
+ z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
+ if (z->B == NULL) goto error;
+ }
+
+ return z;
+error:
+ SN_close_env(z, S_size);
+ return NULL;
+}
+
+extern void SN_close_env(struct SN_env * z, int S_size)
+{
+ if (z == NULL) return;
+ if (S_size)
+ {
+ int i;
+ for (i = 0; i < S_size; i++)
+ {
+ lose_s(z->S[i]);
+ }
+ free(z->S);
+ }
+ free(z->I);
+ free(z->B);
+ if (z->p) lose_s(z->p);
+ free(z);
+}
+
+extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+ int err = replace_s(z, 0, z->l, size, s, NULL);
+ z->c = 0;
+ return err;
+}
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,26 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+ More precisely, replace 'char' with whatever type guarantees the
+ character width you need. Note however that sizeof(symbol) should divide
+ HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+ there is an alignment problem. In the unlikely event of a problem here,
+ consult Martin Porter.
+
+*/
+
+struct SN_env {
+ symbol * p;
+ int c; int l; int lb; int bra; int ket;
+ symbol * * S;
+ int * I;
+ unsigned char * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z, int S_size);
+
+extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,58 @@
+
+#include <limits.h>
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p) ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p) ((int *)(p))[-2]
+
+struct among
+{ int s_size; /* number of chars in string */
+ const symbol * s; /* search string */
+ int substring_i;/* index to longest matching substring */
+ int result; /* result of the lookup */
+ int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
+
+extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+
+extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+
+extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
+extern int eq_v(struct SN_env * z, const symbol * p);
+extern int eq_v_b(struct SN_env * z, const symbol * p);
+
+extern int find_among(struct SN_env * z, const struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
+extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
+extern int slice_from_v(struct SN_env * z, const symbol * p);
+extern int slice_del(struct SN_env * z);
+
+extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
+extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,478 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void) {
+ symbol * p;
+ void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
+ if (mem == NULL) return NULL;
+ p = (symbol *) (HEAD + (char *) mem);
+ CAPACITY(p) = CREATE_SIZE;
+ SET_SIZE(p, CREATE_SIZE);
+ return p;
+}
+
+extern void lose_s(symbol * p) {
+ if (p == NULL) return;
+ free((char *) p - HEAD);
+}
+
+/*
+ new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
+ if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
+ position, or 0 on failure.
+
+ -- used to implement hop and next in the utf8 case.
+*/
+
+extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
+ int b;
+ if (n >= 0) {
+ for (; n > 0; n--) {
+ if (c >= l) return -1;
+ b = p[c++];
+ if (b >= 0xC0) { /* 1100 0000 */
+ while (c < l) {
+ b = p[c];
+ if (b >= 0xC0 || b < 0x80) break;
+ /* break unless b is 10------ */
+ c++;
+ }
+ }
+ }
+ } else {
+ for (; n < 0; n++) {
+ if (c <= lb) return -1;
+ b = p[--c];
+ if (b >= 0x80) { /* 1000 0000 */
+ while (c > lb) {
+ b = p[c];
+ if (b >= 0xC0) break; /* 1100 0000 */
+ c--;
+ }
+ }
+ }
+ }
+ return c;
+}
+
+/* Code for character groupings: utf8 cases */
+
+static int get_utf8(const symbol * p, int c, int l, int * slot) {
+ int b0, b1;
+ if (c >= l) return 0;
+ b0 = p[c++];
+ if (b0 < 0xC0 || c == l) { /* 1100 0000 */
+ * slot = b0; return 1;
+ }
+ b1 = p[c++];
+ if (b0 < 0xE0 || c == l) { /* 1110 0000 */
+ * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
+ }
+ * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
+}
+
+static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
+ int b0, b1;
+ if (c <= lb) return 0;
+ b0 = p[--c];
+ if (b0 < 0x80 || c == lb) { /* 1000 0000 */
+ * slot = b0; return 1;
+ }
+ b1 = p[--c];
+ if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
+ * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
+ }
+ * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
+}
+
+extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ int w = get_utf8(z->p, z->c, z->l, & ch);
+ unless (w) return -1;
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return w;
+ z->c += w;
+ } while (repeat);
+ return 0;
+}
+
+extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ int w = get_b_utf8(z->p, z->c, z->lb, & ch);
+ unless (w) return -1;
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return w;
+ z->c -= w;
+ } while (repeat);
+ return 0;
+}
+
+extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ int w = get_utf8(z->p, z->c, z->l, & ch);
+ unless (w) return -1;
+ unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return w;
+ z->c += w;
+ } while (repeat);
+ return 0;
+}
+
+extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ int w = get_b_utf8(z->p, z->c, z->lb, & ch);
+ unless (w) return -1;
+ unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return w;
+ z->c -= w;
+ } while (repeat);
+ return 0;
+}
+
+/* Code for character groupings: non-utf8 cases */
+
+extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ if (z->c >= z->l) return -1;
+ ch = z->p[z->c];
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return 1;
+ z->c++;
+ } while (repeat);
+ return 0;
+}
+
+extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ if (z->c <= z->lb) return -1;
+ ch = z->p[z->c - 1];
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return 1;
+ z->c--;
+ } while (repeat);
+ return 0;
+}
+
+extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ if (z->c >= z->l) return -1;
+ ch = z->p[z->c];
+ unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return 1;
+ z->c++;
+ } while (repeat);
+ return 0;
+}
+
+extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+ do {
+ int ch;
+ if (z->c <= z->lb) return -1;
+ ch = z->p[z->c - 1];
+ unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+ return 1;
+ z->c--;
+ } while (repeat);
+ return 0;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
+ if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+ z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
+ if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+ z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, const symbol * p) {
+ return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, const symbol * p) {
+ return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
+
+ int i = 0;
+ int j = v_size;
+
+ int c = z->c; int l = z->l;
+ symbol * q = z->p + c;
+
+ const struct among * w;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ int first_key_inspected = 0;
+
+ while(1) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j; /* smaller */
+ w = v + k;
+ {
+ int i2; for (i2 = common; i2 < w->s_size; i2++) {
+ if (c + common == l) { diff = -1; break; }
+ diff = q[common] - w->s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ }
+ if (diff < 0) { j = k; common_j = common; }
+ else { i = k; common_i = common; }
+ if (j - i <= 1) {
+ if (i > 0) break; /* v->s has been inspected */
+ if (j == i) break; /* only one item in v */
+
+ /* - but now we need to go round once more to get
+ v->s inspected. This looks messy, but is actually
+ the optimal approach. */
+
+ if (first_key_inspected) break;
+ first_key_inspected = 1;
+ }
+ }
+ while(1) {
+ w = v + i;
+ if (common_i >= w->s_size) {
+ z->c = c + w->s_size;
+ if (w->function == 0) return w->result;
+ {
+ int res = w->function(z);
+ z->c = c + w->s_size;
+ if (res) return w->result;
+ }
+ }
+ i = w->substring_i;
+ if (i < 0) return 0;
+ }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
+
+ int i = 0;
+ int j = v_size;
+
+ int c = z->c; int lb = z->lb;
+ symbol * q = z->p + c - 1;
+
+ const struct among * w;
+
+ int common_i = 0;
+ int common_j = 0;
+
+ int first_key_inspected = 0;
+
+ while(1) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j;
+ w = v + k;
+ {
+ int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
+ if (c - common == lb) { diff = -1; break; }
+ diff = q[- common] - w->s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ }
+ if (diff < 0) { j = k; common_j = common; }
+ else { i = k; common_i = common; }
+ if (j - i <= 1) {
+ if (i > 0) break;
+ if (j == i) break;
+ if (first_key_inspected) break;
+ first_key_inspected = 1;
+ }
+ }
+ while(1) {
+ w = v + i;
+ if (common_i >= w->s_size) {
+ z->c = c - w->s_size;
+ if (w->function == 0) return w->result;
+ {
+ int res = w->function(z);
+ z->c = c - w->s_size;
+ if (res) return w->result;
+ }
+ }
+ i = w->substring_i;
+ if (i < 0) return 0;
+ }
+}
+
+
+/* Increase the size of the buffer pointed to by p to at least n symbols.
+ * If insufficient memory, returns NULL and frees the old buffer.
+ */
+static symbol * increase_size(symbol * p, int n) {
+ symbol * q;
+ int new_size = n + 20;
+ void * mem = realloc((char *) p - HEAD,
+ HEAD + (new_size + 1) * sizeof(symbol));
+ if (mem == NULL) {
+ lose_s(p);
+ return NULL;
+ }
+ q = (symbol *) (HEAD + (char *)mem);
+ CAPACITY(q) = new_size;
+ return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+ s_size symbols at s.
+ Returns 0 on success, -1 on error.
+ Also, frees z->p (and sets it to NULL) on error.
+*/
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
+{
+ int adjustment;
+ int len;
+ if (z->p == NULL) {
+ z->p = create_s();
+ if (z->p == NULL) return -1;
+ }
+ adjustment = s_size - (c_ket - c_bra);
+ len = SIZE(z->p);
+ if (adjustment != 0) {
+ if (adjustment + len > CAPACITY(z->p)) {
+ z->p = increase_size(z->p, adjustment + len);
+ if (z->p == NULL) return -1;
+ }
+ memmove(z->p + c_ket + adjustment,
+ z->p + c_ket,
+ (len - c_ket) * sizeof(symbol));
+ SET_SIZE(z->p, adjustment + len);
+ z->l += adjustment;
+ if (z->c >= c_ket)
+ z->c += adjustment;
+ else
+ if (z->c > c_bra)
+ z->c = c_bra;
+ }
+ unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+ if (adjptr != NULL)
+ *adjptr = adjustment;
+ return 0;
+}
+
+static int slice_check(struct SN_env * z) {
+
+ if (z->bra < 0 ||
+ z->bra > z->ket ||
+ z->ket > z->l ||
+ z->p == NULL ||
+ z->l > SIZE(z->p)) /* this line could be removed */
+ {
+#if 0
+ fprintf(stderr, "faulty slice operation:\n");
+ debug(z, -1, 0);
+#endif
+ return -1;
+ }
+ return 0;
+}
+
+extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
+ if (slice_check(z)) return -1;
+ return replace_s(z, z->bra, z->ket, s_size, s, NULL);
+}
+
+extern int slice_from_v(struct SN_env * z, const symbol * p) {
+ return slice_from_s(z, SIZE(p), p);
+}
+
+extern int slice_del(struct SN_env * z) {
+ return slice_from_s(z, 0, 0);
+}
+
+extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
+ int adjustment;
+ if (replace_s(z, bra, ket, s_size, s, &adjustment))
+ return -1;
+ if (bra <= z->bra) z->bra += adjustment;
+ if (bra <= z->ket) z->ket += adjustment;
+ return 0;
+}
+
+extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
+ int adjustment;
+ if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
+ return -1;
+ if (bra <= z->bra) z->bra += adjustment;
+ if (bra <= z->ket) z->ket += adjustment;
+ return 0;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p) {
+ if (slice_check(z)) {
+ lose_s(p);
+ return NULL;
+ }
+ {
+ int len = z->ket - z->bra;
+ if (CAPACITY(p) < len) {
+ p = increase_size(p, len);
+ if (p == NULL)
+ return NULL;
+ }
+ memmove(p, z->p + z->bra, len * sizeof(symbol));
+ SET_SIZE(p, len);
+ }
+ return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p) {
+ int len = z->l;
+ if (CAPACITY(p) < len) {
+ p = increase_size(p, len);
+ if (p == NULL)
+ return NULL;
+ }
+ memmove(p, z->p, len * sizeof(symbol));
+ SET_SIZE(p, len);
+ return p;
+}
+
+#if 0
+extern void debug(struct SN_env * z, int number, int line_count) {
+ int i;
+ int limit = SIZE(z->p);
+ /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+ for (i = 0; i <= limit; i++) {
+ if (z->lb == i) printf("{");
+ if (z->bra == i) printf("[");
+ if (z->c == i) printf("|");
+ if (z->ket == i) printf("]");
+ if (z->l == i) printf("}");
+ if (i < limit)
+ { int ch = z->p[i];
+ if (ch == 0) ch = '#';
+ printf("%c", ch);
+ }
+ }
+ printf("'\n");
+}
+#endif
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,339 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "../runtime/header.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int danish_UTF_8_stem(struct SN_env * z);
+#ifdef __cplusplus
+}
+#endif
+static int r_undouble(struct SN_env * z);
+static int r_other_suffix(struct SN_env * z);
+static int r_consonant_pair(struct SN_env * z);
+static int r_main_suffix(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+extern struct SN_env * danish_UTF_8_create_env(void);
+extern void danish_UTF_8_close_env(struct SN_env * z);
+
+
+#ifdef __cplusplus
+}
+#endif
+static const symbol s_0_0[3] = { 'h', 'e', 'd' };
+static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
+static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
+static const symbol s_0_3[1] = { 'e' };
+static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
+static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
+static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
+static const symbol s_0_7[3] = { 'e', 'n', 'e' };
+static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
+static const symbol s_0_9[3] = { 'e', 'r', 'e' };
+static const symbol s_0_10[2] = { 'e', 'n' };
+static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
+static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
+static const symbol s_0_13[2] = { 'e', 'r' };
+static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
+static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
+static const symbol s_0_16[1] = { 's' };
+static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
+static const symbol s_0_18[2] = { 'e', 's' };
+static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
+static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
+static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
+static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
+static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
+static const symbol s_0_24[3] = { 'e', 'n', 's' };
+static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
+static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
+static const symbol s_0_27[3] = { 'e', 'r', 's' };
+static const symbol s_0_28[3] = { 'e', 't', 's' };
+static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
+static const symbol s_0_30[2] = { 'e', 't' };
+static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
+
+static const struct among a_0[32] =
+{
+/* 0 */ { 3, s_0_0, -1, 1, 0},
+/* 1 */ { 5, s_0_1, 0, 1, 0},
+/* 2 */ { 4, s_0_2, -1, 1, 0},
+/* 3 */ { 1, s_0_3, -1, 1, 0},
+/* 4 */ { 5, s_0_4, 3, 1, 0},
+/* 5 */ { 4, s_0_5, 3, 1, 0},
+/* 6 */ { 6, s_0_6, 5, 1, 0},
+/* 7 */ { 3, s_0_7, 3, 1, 0},
+/* 8 */ { 4, s_0_8, 3, 1, 0},
+/* 9 */ { 3, s_0_9, 3, 1, 0},
+/* 10 */ { 2, s_0_10, -1, 1, 0},
+/* 11 */ { 5, s_0_11, 10, 1, 0},
+/* 12 */ { 4, s_0_12, 10, 1, 0},
+/* 13 */ { 2, s_0_13, -1, 1, 0},
+/* 14 */ { 5, s_0_14, 13, 1, 0},
+/* 15 */ { 4, s_0_15, 13, 1, 0},
+/* 16 */ { 1, s_0_16, -1, 2, 0},
+/* 17 */ { 4, s_0_17, 16, 1, 0},
+/* 18 */ { 2, s_0_18, 16, 1, 0},
+/* 19 */ { 5, s_0_19, 18, 1, 0},
+/* 20 */ { 7, s_0_20, 19, 1, 0},
+/* 21 */ { 4, s_0_21, 18, 1, 0},
+/* 22 */ { 5, s_0_22, 18, 1, 0},
+/* 23 */ { 4, s_0_23, 18, 1, 0},
+/* 24 */ { 3, s_0_24, 16, 1, 0},
+/* 25 */ { 6, s_0_25, 24, 1, 0},
+/* 26 */ { 5, s_0_26, 24, 1, 0},
+/* 27 */ { 3, s_0_27, 16, 1, 0},
+/* 28 */ { 3, s_0_28, 16, 1, 0},
+/* 29 */ { 5, s_0_29, 28, 1, 0},
+/* 30 */ { 2, s_0_30, -1, 1, 0},
+/* 31 */ { 4, s_0_31, 30, 1, 0}
+};
+
+static const symbol s_1_0[2] = { 'g', 'd' };
+static const symbol s_1_1[2] = { 'd', 't' };
+static const symbol s_1_2[2] = { 'g', 't' };
+static const symbol s_1_3[2] = { 'k', 't' };
+
+static const struct among a_1[4] =
+{
+/* 0 */ { 2, s_1_0, -1, -1, 0},
+/* 1 */ { 2, s_1_1, -1, -1, 0},
+/* 2 */ { 2, s_1_2, -1, -1, 0},
+/* 3 */ { 2, s_1_3, -1, -1, 0}
+};
+
+static const symbol s_2_0[2] = { 'i', 'g' };
+static const symbol s_2_1[3] = { 'l', 'i', 'g' };
+static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
+static const symbol s_2_3[3] = { 'e', 'l', 's' };
+static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' };
+
+static const struct among a_2[5] =
+{
+/* 0 */ { 2, s_2_0, -1, 1, 0},
+/* 1 */ { 3, s_2_1, 0, 1, 0},
+/* 2 */ { 4, s_2_2, 1, 1, 0},
+/* 3 */ { 3, s_2_3, -1, 1, 0},
+/* 4 */ { 5, s_2_4, -1, 2, 0}
+};
+
+static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+
+static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
+
+static const symbol s_0[] = { 's', 't' };
+static const symbol s_1[] = { 'i', 'g' };
+static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
+
+static int r_mark_regions(struct SN_env * z) {
+ z->I[0] = z->l;
+ { int c_test = z->c; /* test, line 33 */
+ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
+ if (ret < 0) return 0;
+ z->c = ret; /* hop, line 33 */
+ }
+ z->I[1] = z->c; /* setmark x, line 33 */
+ z->c = c_test;
+ }
+ if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
+ { /* gopast */ /* non v, line 34 */
+ int ret = in_grouping_U(z, g_v, 97, 248, 1);
+ if (ret < 0) return 0;
+ z->c += ret;
+ }
+ z->I[0] = z->c; /* setmark p1, line 34 */
+ /* try, line 35 */
+ if (!(z->I[0] < z->I[1])) goto lab0;
+ z->I[0] = z->I[1];
+lab0:
+ return 1;
+}
+
+static int r_main_suffix(struct SN_env * z) {
+ int among_var;
+ { int mlimit; /* setlimit, line 41 */
+ int m1 = z->l - z->c; (void)m1;
+ if (z->c < z->I[0]) return 0;
+ z->c = z->I[0]; /* tomark, line 41 */
+ mlimit = z->lb; z->lb = z->c;
+ z->c = z->l - m1;
+ z->ket = z->c; /* [, line 41 */
+ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
+ among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
+ if (!(among_var)) { z->lb = mlimit; return 0; }
+ z->bra = z->c; /* ], line 41 */
+ z->lb = mlimit;
+ }
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ { int ret = slice_del(z); /* delete, line 48 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 2:
+ if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0;
+ { int ret = slice_del(z); /* delete, line 50 */
+ if (ret < 0) return ret;
+ }
+ break;
+ }
+ return 1;
+}
+
+static int r_consonant_pair(struct SN_env * z) {
+ { int m_test = z->l - z->c; /* test, line 55 */
+ { int mlimit; /* setlimit, line 56 */
+ int m1 = z->l - z->c; (void)m1;
+ if (z->c < z->I[0]) return 0;
+ z->c = z->I[0]; /* tomark, line 56 */
+ mlimit = z->lb; z->lb = z->c;
+ z->c = z->l - m1;
+ z->ket = z->c; /* [, line 56 */
+ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; }
+ if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */
+ z->bra = z->c; /* ], line 56 */
+ z->lb = mlimit;
+ }
+ z->c = z->l - m_test;
+ }
+ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
+ if (ret < 0) return 0;
+ z->c = ret; /* next, line 62 */
+ }
+ z->bra = z->c; /* ], line 62 */
+ { int ret = slice_del(z); /* delete, line 62 */
+ if (ret < 0) return ret;
+ }
+ return 1;
+}
+
+static int r_other_suffix(struct SN_env * z) {
+ int among_var;
+ { int m1 = z->l - z->c; (void)m1; /* do, line 66 */
+ z->ket = z->c; /* [, line 66 */
+ if (!(eq_s_b(z, 2, s_0))) goto lab0;
+ z->bra = z->c; /* ], line 66 */
+ if (!(eq_s_b(z, 2, s_1))) goto lab0;
+ { int ret = slice_del(z); /* delete, line 66 */
+ if (ret < 0) return ret;
+ }
+ lab0:
+ z->c = z->l - m1;
+ }
+ { int mlimit; /* setlimit, line 67 */
+ int m2 = z->l - z->c; (void)m2;
+ if (z->c < z->I[0]) return 0;
+ z->c = z->I[0]; /* tomark, line 67 */
+ mlimit = z->lb; z->lb = z->c;
+ z->c = z->l - m2;
+ z->ket = z->c; /* [, line 67 */
+ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
+ among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
+ if (!(among_var)) { z->lb = mlimit; return 0; }
+ z->bra = z->c; /* ], line 67 */
+ z->lb = mlimit;
+ }
+ switch(among_var) {
+ case 0: return 0;
+ case 1:
+ { int ret = slice_del(z); /* delete, line 70 */
+ if (ret < 0) return ret;
+ }
+ { int m3 = z->l - z->c; (void)m3; /* do, line 70 */
+ { int ret = r_consonant_pair(z);
+ if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
+ if (ret < 0) return ret;
+ }
+ lab1:
+ z->c = z->l - m3;
+ }
+ break;
+ case 2:
+ { int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
+ if (ret < 0) return ret;
+ }
+ break;
+ }
+ return 1;
+}
+
+static int r_undouble(struct SN_env * z) {
+ { int mlimit; /* setlimit, line 76 */
+ int m1 = z->l - z->c; (void)m1;
+ if (z->c < z->I[0]) return 0;
+ z->c = z->I[0]; /* tomark, line 76 */
+ mlimit = z->lb; z->lb = z->c;
+ z->c = z->l - m1;
+ z->ket = z->c; /* [, line 76 */
+ if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; }
+ z->bra = z->c; /* ], line 76 */
+ z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
+ if (z->S[0] == 0) return -1; /* -> ch, line 76 */
+ z->lb = mlimit;
+ }
+ if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
+ { int ret = slice_del(z); /* delete, line 78 */
+ if (ret < 0) return ret;
+ }
+ return 1;
+}
+
+extern int danish_UTF_8_stem(struct SN_env * z) {
+ { int c1 = z->c; /* do, line 84 */
+ { int ret = r_mark_regions(z);
+ if (ret == 0) goto lab0; /* call mark_regions, line 84 */
+ if (ret < 0) return ret;
+ }
+ lab0:
+ z->c = c1;
+ }
+ z->lb = z->c; z->c = z->l; /* backwards, line 85 */
+
+ { int m2 = z->l - z->c; (void)m2; /* do, line 86 */
+ { int ret = r_main_suffix(z);
+ if (ret == 0) goto lab1; /* call main_suffix, line 86 */
+ if (ret < 0) return ret;
+ }
+ lab1:
+ z->c = z->l - m2;
+ }
+ { int m3 = z->l - z->c; (void)m3; /* do, line 87 */
+ { int ret = r_consonant_pair(z);
+ if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
+ if (ret < 0) return ret;
+ }
+ lab2:
+ z->c = z->l - m3;
+ }
+ { int m4 = z->l - z->c; (void)m4; /* do, line 88 */
+ { int ret = r_other_suffix(z);
+ if (ret == 0) goto lab3; /* call other_suffix, line 88 */
+ if (ret < 0) return ret;
+ }
+ lab3:
+ z->c = z->l - m4;
+ }
+ { int m5 = z->l - z->c; (void)m5; /* do, line 89 */
+ { int ret = r_undouble(z);
+ if (ret == 0) goto lab4; /* call undouble, line 89 */
+ if (ret < 0) return ret;
+ }
+ lab4:
+ z->c = z->l - m5;
+ }
+ z->c = z->lb;
+ return 1;
+}
+
+extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); }
+
+extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); }
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,16 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct SN_env * danish_UTF_8_create_env(void);
+extern void danish_UTF_8_close_env(struct SN_env * z);
+
+extern int danish_UTF_8_stem(struct SN_env * z);
+
+#ifdef __cplusplus
+}
+#endif
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,634 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "../runtime/header.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int dutch_UTF_8_stem(struct SN_env * z);
+#ifdef __cplusplus
+}
+#endif
+static int r_standard_suffix(struct SN_env * z);
+static int r_undouble(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_en_ending(struct SN_env * z);
+static int r_e_ending(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+extern struct SN_env * dutch_UTF_8_create_env(void);
+extern void dutch_UTF_8_close_env(struct SN_env * z);
+
+
+#ifdef __cplusplus
+}
+#endif
+static const symbol s_0_1[2] = { 0xC3, 0xA1 };
+static const symbol s_0_2[2] = { 0xC3, 0xA4 };
+static const symbol s_0_3[2] = { 0xC3, 0xA9 };
+static const symbol s_0_4[2] = { 0xC3, 0xAB };
+static const symbol s_0_5[2] = { 0xC3, 0xAD };
+static const symbol s_0_6[2] = { 0xC3, 0xAF };
+static const symbol s_0_7[2] = { 0xC3, 0xB3 };
+static const symbol s_0_8[2] = { 0xC3, 0xB6 };
+static const symbol s_0_9[2] = { 0xC3, 0xBA };
+static const symbol s_0_10[2] = { 0xC3, 0xBC };
+
+static const struct among a_0[11] =
+{
+/* 0 */ { 0, 0, -1, 6, 0},
+/* 1 */ { 2, s_0_1, 0, 1, 0},
+/* 2 */ { 2, s_0_2, 0, 1, 0},
+/* 3 */ { 2, s_0_3, 0, 2, 0},
+/* 4 */ { 2, s_0_4, 0, 2, 0},
+/* 5 */ { 2, s_0_5, 0, 3, 0},
+/* 6 */ { 2, s_0_6, 0, 3, 0},
+/* 7 */ { 2, s_0_7, 0, 4, 0},
+/* 8 */ { 2, s_0_8, 0, 4, 0},
+/* 9 */ { 2, s_0_9, 0, 5, 0},
+/* 10 */ { 2, s_0_10, 0, 5, 0}
+};
+
+static const symbol s_1_1[1] = { 'I' };
+static const symbol s_1_2[1] = { 'Y' };
+
+static const struct among a_1[3] =
+{
+/* 0 */ { 0, 0, -1, 3, 0},
+/* 1 */ { 1, s_1_1, 0, 2, 0},
+/* 2 */ { 1, s_1_2, 0, 1, 0}
+};
+
+static const symbol s_2_0[2] = { 'd', 'd' };
+static const symbol s_2_1[2] = { 'k', 'k' };
+static const symbol s_2_2[2] = { 't', 't' };
+
+static const struct among a_2[3] =
+{
+/* 0 */ { 2, s_2_0, -1, -1, 0},
+/* 1 */ { 2, s_2_1, -1, -1, 0},
+/* 2 */ { 2, s_2_2, -1, -1, 0}
+};
+
+static const symbol s_3_0[3] = { 'e', 'n', 'e' };
+static const symbol s_3_1[2] = { 's', 'e' };
+static const symbol s_3_2[2] = { 'e', 'n' };
+static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' };
+static const symbol s_3_4[1] = { 's' };
+
+static const struct among a_3[5] =
+{
+/* 0 */ { 3, s_3_0, -1, 2, 0},
+/* 1 */ { 2, s_3_1, -1, 3, 0},
+/* 2 */ { 2, s_3_2, -1, 2, 0},
+/* 3 */ { 5, s_3_3, 2, 1, 0},
+/* 4 */ { 1, s_3_4, -1, 3, 0}
+};
+
+static const symbol s_4_0[3] = { 'e', 'n', 'd' };
+static const symbol s_4_1[2] = { 'i', 'g' };
+static const symbol s_4_2[3] = { 'i', 'n', 'g' };
+static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' };
+static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' };
+static const symbol s_4_5[3] = { 'b', 'a', 'r' };
+
+static const struct among a_4[6] =
+{
+/* 0 */ { 3, s_4_0, -1, 1, 0},
+/* 1 */ { 2, s_4_1, -1, 2, 0},
+/* 2 */ { 3, s_4_2, -1, 1, 0},
+/* 3 */ { 4, s_4_3, -1, 3, 0},
+/* 4 */ { 4, s_4_4, -1, 4, 0},
+/* 5 */ { 3, s_4_5, -1, 5, 0}
+};
+
+static const symbol s_5_0[2] = { 'a', 'a' };
+static const symbol s_5_1[2] = { 'e', 'e' };
+static const symbol s_5_2[2] = { 'o', 'o' };
+static const symbol s_5_3[2] = { 'u', 'u' };
+
+static const struct among a_5[4] =
+{
+/* 0 */ { 2, s_5_0, -1, -1, 0},
+/* 1 */ { 2, s_5_1, -1, -1, 0},
+/* 2 */ { 2, s_5_2, -1, -1, 0},
+/* 3 */ { 2, s_5_3, -1, -1, 0}
+};
+
+static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+static const symbol s_0[] = { 'a' };
+static const symbol s_1[] = { 'e' };
+static const symbol s_2[] = { 'i' };
+static const symbol s_3[] = { 'o' };
+static const symbol s_4[] = { 'u' };
+static const symbol s_5[] = { 'y' };
+static const symbol s_6[] = { 'Y' };
+static const symbol s_7[] = { 'i' };
+static const symbol s_8[] = { 'I' };
+static const symbol s_9[] = { 'y' };
+static const symbol s_10[] = { 'Y' };
+static const symbol s_11[] = { 'y' };
+static const symbol s_12[] = { 'i' };
+static const symbol s_13[] = { 'e' };
+static const symbol s_14[] = { 'g', 'e', 'm' };
+static const symbol s_15[] = { 'h', 'e', 'i', 'd' };
+static const symbol s_16[] = { 'h', 'e', 'i', 'd' };
+static const symbol s_17[] = { 'c' };
+static const symbol s_18[] = { 'e', 'n' };
+static const symbol s_19[] = { 'i', 'g' };
+static const symbol s_20[] = { 'e' };
+static const symbol s_21[] = { 'e' };
+
+static int r_prelude(struct SN_env * z) {
+ int among_var;
+ { int c_test = z->c; /* test, line 42 */
+ while(1) { /* repeat, line 42 */
+ int c1 = z->c;
+ z->bra = z->c; /* [, line 43 */
+ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else
+ among_var = find_among(z, a_0, 11); /* substring, line 43 */
+ if (!(among_var)) goto lab0;
+ z->ket = z->c; /* ], line 43 */
+ switch(among_var) {
+ case 0: goto lab0;
+ case 1:
+ { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 2:
+ { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 3:
+ { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 4:
+ { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 5:
+ { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 6:
+ { int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
+ if (ret < 0) goto lab0;
+ z->c = ret; /* next, line 54 */
+ }
+ break;
+ }
+ continue;
+ lab0:
+ z->c = c1;
+ break;
+ }
+ z->c = c_test;
+ }
+ { int c_keep = z->c; /* try, line 57 */
+ z->bra = z->c; /* [, line 57 */
+ if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; }
+ z->ket = z->c; /* ], line 57 */
+ { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */
+ if (ret < 0) return ret;
+ }
+ lab1:
+ ;
+ }
+ while(1) { /* repeat, line 58 */
+ int c2 = z->c;
+ while(1) { /* goto, line 58 */
+ int c3 = z->c;
+ if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3;
+ z->bra = z->c; /* [, line 59 */
+ { int c4 = z->c; /* or, line 59 */
+ if (!(eq_s(z, 1, s_7))) goto lab5;
+ z->ket = z->c; /* ], line 59 */
+ if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5;
+ { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */
+ if (ret < 0) return ret;
+ }
+ goto lab4;
+ lab5:
+ z->c = c4;
+ if (!(eq_s(z, 1, s_9))) goto lab3;
+ z->ket = z->c; /* ], line 60 */
+ { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */
+ if (ret < 0) return ret;
+ }
+ }
+ lab4:
+ z->c = c3;
+ break;
+ lab3:
+ z->c = c3;
+ { int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
+ if (ret < 0) goto lab2;
+ z->c = ret; /* goto, line 58 */
+ }
+ }
+ continue;
+ lab2:
+ z->c = c2;
+ break;
+ }
+ return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+ z->I[0] = z->l;
+ z->I[1] = z->l;
+ { /* gopast */ /* grouping v, line 69 */
+ int ret = out_grouping_U(z, g_v, 97, 232, 1);
+ if (ret < 0) return 0;
+ z->c += ret;
+ }
+ { /* gopast */ /* non v, line 69 */
+ int ret = in_grouping_U(z, g_v, 97, 232, 1);
+ if (ret < 0) return 0;
+ z->c += ret;
+ }
+ z->I[0] = z->c; /* setmark p1, line 69 */
+ /* try, line 70 */
+ if (!(z->I[0] < 3)) goto lab0;
+ z->I[0] = 3;
+lab0:
+ { /* gopast */ /* grouping v, line 71 */
+ int ret = out_grouping_U(z, g_v, 97, 232, 1);
+ if (ret < 0) return 0;
+ z->c += ret;
+ }
+ { /* gopast */ /* non v, line 71 */
+ int ret = in_grouping_U(z, g_v, 97, 232, 1);
+ if (ret < 0) return 0;
+ z->c += ret;
+ }
+ z->I[1] = z->c; /* setmark p2, line 71 */
+ return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+ int among_var;
+ while(1) { /* repeat, line 75 */
+ int c1 = z->c;
+ z->bra = z->c; /* [, line 77 */
+ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else
+ among_var = find_among(z, a_1, 3); /* substring, line 77 */
+ if (!(among_var)) goto lab0;
+ z->ket = z->c; /* ], line 77 */
+ switch(among_var) {
+ case 0: goto lab0;
+ case 1:
+ { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 2:
+ { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 3:
+ { int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
+ if (ret < 0) goto lab0;
+ z->c = ret; /* next, line 80 */
+ }
+ break;
+ }
+ continue;
+ lab0:
+ z->c = c1;
+ break;
+ }
+ return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+ if (!(z->I[0] <= z->c)) return 0;
+ return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+ if (!(z->I[1] <= z->c)) return 0;
+ return 1;
+}
+
+static int r_undouble(struct SN_env * z) {
+ { int m_test = z->l - z->c; /* test, line 91 */
+ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
+ if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */
+ z->c = z->l - m_test;
+ }
+ z->ket = z->c; /* [, line 91 */
+ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
+ if (ret < 0) return 0;
+ z->c = ret; /* next, line 91 */
+ }
+ z->bra = z->c; /* ], line 91 */
+ { int ret = slice_del(z); /* delete, line 91 */
+ if (ret < 0) return ret;
+ }
+ return 1;
+}
+
+static int r_e_ending(struct SN_env * z) {
+ z->B[0] = 0; /* unset e_found, line 95 */
+ z->ket = z->c; /* [, line 96 */
+ if (!(eq_s_b(z, 1, s_13))) return 0;
+ z->bra = z->c; /* ], line 96 */
+ { int ret = r_R1(z);
+ if (ret == 0) return 0; /* call R1, line 96 */
+ if (ret < 0) return ret;
+ }
+ { int m_test = z->l - z->c; /* test, line 96 */
+ if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
+ z->c = z->l - m_test;
+ }
+ { int ret = slice_del(z); /* delete, line 96 */
+ if (ret < 0) return ret;
+ }
+ z->B[0] = 1; /* set e_found, line 97 */
+ { int ret = r_undouble(z);
+ if (ret == 0) return 0; /* call undouble, line 98 */
+ if (ret < 0) return ret;
+ }
+ return 1;
+}
+
+static int r_en_ending(struct SN_env * z) {
+ { int ret = r_R1(z);
+ if (ret == 0) return 0; /* call R1, line 102 */
+ if (ret < 0) return ret;
+ }
+ { int m1 = z->l - z->c; (void)m1; /* and, line 102 */
+ if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
+ z->c = z->l - m1;
+ { int m2 = z->l - z->c; (void)m2; /* not, line 102 */
+ if (!(eq_s_b(z, 3, s_14))) goto lab0;
+ return 0;
+ lab0:
+ z->c = z->l - m2;
+ }
+ }
+ { int ret = slice_del(z); /* delete, line 102 */
+ if (ret < 0) return ret;
+ }
+ { int ret = r_undouble(z);
+ if (ret == 0) return 0; /* call undouble, line 103 */
+ if (ret < 0) return ret;
+ }
+ return 1;
+}
+
+static int r_standard_suffix(struct SN_env * z) {
+ int among_var;
+ { int m1 = z->l - z->c; (void)m1; /* do, line 107 */
+ z->ket = z->c; /* [, line 108 */
+ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
+ among_var = find_among_b(z, a_3, 5); /* substring, line 108 */
+ if (!(among_var)) goto lab0;
+ z->bra = z->c; /* ], line 108 */
+ switch(among_var) {
+ case 0: goto lab0;
+ case 1:
+ { int ret = r_R1(z);
+ if (ret == 0) goto lab0; /* call R1, line 110 */
+ if (ret < 0) return ret;
+ }
+ { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 2:
+ { int ret = r_en_ending(z);
+ if (ret == 0) goto lab0; /* call en_ending, line 113 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 3:
+ { int ret = r_R1(z);
+ if (ret == 0) goto lab0; /* call R1, line 116 */
+ if (ret < 0) return ret;
+ }
+ if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0;
+ { int ret = slice_del(z); /* delete, line 116 */
+ if (ret < 0) return ret;
+ }
+ break;
+ }
+ lab0:
+ z->c = z->l - m1;
+ }
+ { int m2 = z->l - z->c; (void)m2; /* do, line 120 */
+ { int ret = r_e_ending(z);
+ if (ret == 0) goto lab1; /* call e_ending, line 120 */
+ if (ret < 0) return ret;
+ }
+ lab1:
+ z->c = z->l - m2;
+ }
+ { int m3 = z->l - z->c; (void)m3; /* do, line 122 */
+ z->ket = z->c; /* [, line 122 */
+ if (!(eq_s_b(z, 4, s_16))) goto lab2;
+ z->bra = z->c; /* ], line 122 */
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab2; /* call R2, line 122 */
+ if (ret < 0) return ret;
+ }
+ { int m4 = z->l - z->c; (void)m4; /* not, line 122 */
+ if (!(eq_s_b(z, 1, s_17))) goto lab3;
+ goto lab2;
+ lab3:
+ z->c = z->l - m4;
+ }
+ { int ret = slice_del(z); /* delete, line 122 */
+ if (ret < 0) return ret;
+ }
+ z->ket = z->c; /* [, line 123 */
+ if (!(eq_s_b(z, 2, s_18))) goto lab2;
+ z->bra = z->c; /* ], line 123 */
+ { int ret = r_en_ending(z);
+ if (ret == 0) goto lab2; /* call en_ending, line 123 */
+ if (ret < 0) return ret;
+ }
+ lab2:
+ z->c = z->l - m3;
+ }
+ { int m5 = z->l - z->c; (void)m5; /* do, line 126 */
+ z->ket = z->c; /* [, line 127 */
+ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4;
+ among_var = find_among_b(z, a_4, 6); /* substring, line 127 */
+ if (!(among_var)) goto lab4;
+ z->bra = z->c; /* ], line 127 */
+ switch(among_var) {
+ case 0: goto lab4;
+ case 1:
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab4; /* call R2, line 129 */
+ if (ret < 0) return ret;
+ }
+ { int ret = slice_del(z); /* delete, line 129 */
+ if (ret < 0) return ret;
+ }
+ { int m6 = z->l - z->c; (void)m6; /* or, line 130 */
+ z->ket = z->c; /* [, line 130 */
+ if (!(eq_s_b(z, 2, s_19))) goto lab6;
+ z->bra = z->c; /* ], line 130 */
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab6; /* call R2, line 130 */
+ if (ret < 0) return ret;
+ }
+ { int m7 = z->l - z->c; (void)m7; /* not, line 130 */
+ if (!(eq_s_b(z, 1, s_20))) goto lab7;
+ goto lab6;
+ lab7:
+ z->c = z->l - m7;
+ }
+ { int ret = slice_del(z); /* delete, line 130 */
+ if (ret < 0) return ret;
+ }
+ goto lab5;
+ lab6:
+ z->c = z->l - m6;
+ { int ret = r_undouble(z);
+ if (ret == 0) goto lab4; /* call undouble, line 130 */
+ if (ret < 0) return ret;
+ }
+ }
+ lab5:
+ break;
+ case 2:
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab4; /* call R2, line 133 */
+ if (ret < 0) return ret;
+ }
+ { int m8 = z->l - z->c; (void)m8; /* not, line 133 */
+ if (!(eq_s_b(z, 1, s_21))) goto lab8;
+ goto lab4;
+ lab8:
+ z->c = z->l - m8;
+ }
+ { int ret = slice_del(z); /* delete, line 133 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 3:
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab4; /* call R2, line 136 */
+ if (ret < 0) return ret;
+ }
+ { int ret = slice_del(z); /* delete, line 136 */
+ if (ret < 0) return ret;
+ }
+ { int ret = r_e_ending(z);
+ if (ret == 0) goto lab4; /* call e_ending, line 136 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 4:
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab4; /* call R2, line 139 */
+ if (ret < 0) return ret;
+ }
+ { int ret = slice_del(z); /* delete, line 139 */
+ if (ret < 0) return ret;
+ }
+ break;
+ case 5:
+ { int ret = r_R2(z);
+ if (ret == 0) goto lab4; /* call R2, line 142 */
+ if (ret < 0) return ret;
+ }
+ if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */
+ { int ret = slice_del(z); /* delete, line 142 */
+ if (ret < 0) return ret;
+ }
+ break;
+ }
+ lab4:
+ z->c = z->l - m5;
+ }
+ { int m9 = z->l - z->c; (void)m9; /* do, line 146 */
+ if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9;
+ { int m_test = z->l - z->c; /* test, line 148 */
+ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9;
+ if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */
+ if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9;
+ z->c = z->l - m_test;
+ }
+ z->ket = z->c; /* [, line 152 */
+ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
+ if (ret < 0) goto lab9;
+ z->c = ret; /* next, line 152 */
+ }
+ z->bra = z->c; /* ], line 152 */
+ { int ret = slice_del(z); /* delete, line 152 */
+ if (ret < 0) return ret;
+ }
+ lab9:
+ z->c = z->l - m9;
+ }
+ return 1;
+}
+
+extern int dutch_UTF_8_stem(struct SN_env * z) {
+ { int c1 = z->c; /* do, line 159 */
+ { int ret = r_prelude(z);
+ if (ret == 0) goto lab0; /* call prelude, line 159 */
+ if (ret < 0) return ret;
+ }
+ lab0:
+ z->c = c1;
+ }
+ { int c2 = z->c; /* do, line 160 */
+ { int ret = r_mark_regions(z);
+ if (ret == 0) goto lab1; /* call mark_regions, line 160 */
+ if (ret < 0) return ret;
+ }
+ lab1:
+ z->c = c2;
+ }
+ z->lb = z->c; z->c = z->l; /* backwards, line 161 */
+
+ { int m3 = z->l - z->c; (void)m3; /* do, line 162 */
+ { int ret = r_standard_suffix(z);
+ if (ret == 0) goto lab2; /* call standard_suffix, line 162 */
+ if (ret < 0) return ret;
+ }
+ lab2:
+ z->c = z->l - m3;
+ }
+ z->c = z->lb;
+ { int c4 = z->c; /* do, line 163 */
+ { int ret = r_postlude(z);
+ if (ret == 0) goto lab3; /* call postlude, line 163 */
+ if (ret < 0) return ret;
+ }
+ lab3:
+ z->c = c4;
+ }
+ return 1;
+}
+
+extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }
+
Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,16 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct SN_env * dutch_UTF_8_create_env(void);
+extern void dutch_UTF_8_close_env(struct SN_env * z);
+
+extern int dutch_UTF_8_stem(struct SN_env * z);
+
+#ifdef __cplusplus
+}
+#endif
+