You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/11/10 17:02:41 UTC

[lucy-commits] svn commit: r1033549 [1/9] - in /incubator/lucy/trunk: ./ core/Lucy/Analysis/ devel/conf/ modules/ modules/analysis/ modules/analysis/snowstem/ modules/analysis/snowstem/devel/ modules/analysis/snowstem/source/ modules/analysis/snowstem/source/include/...

Author: marvin
Date: Wed Nov 10 16:02:40 2010
New Revision: 1033549

URL: http://svn.apache.org/viewvc?rev=1033549&view=rev
Log:
LUCY-125
Bundle Snowball stemming libraries with Lucy, eliminating dependency on CPAN
module Lingua::Stem::Snowball.

Added:
    incubator/lucy/trunk/modules/
    incubator/lucy/trunk/modules/analysis/
    incubator/lucy/trunk/modules/analysis/snowstem/
    incubator/lucy/trunk/modules/analysis/snowstem/devel/
    incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl
    incubator/lucy/trunk/modules/analysis/snowstem/source/
    incubator/lucy/trunk/modules/analysis/snowstem/source/include/
    incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/
    incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/
    incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_english.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_english.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_finnish.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_finnish.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_french.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_french.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_german.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_german.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_hungarian.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_hungarian.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_italian.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_italian.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_norwegian.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_norwegian.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_porter.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_porter.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_portuguese.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_portuguese.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_romanian.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_romanian.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_russian.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_russian.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_spanish.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_spanish.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_swedish.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_swedish.h
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_turkish.c
    incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_turkish.h
Removed:
    incubator/lucy/trunk/perl/xs/Lucy/Analysis/Stemmer.c
Modified:
    incubator/lucy/trunk/LICENSE
    incubator/lucy/trunk/NOTICE
    incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c
    incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh
    incubator/lucy/trunk/devel/conf/lucyperl.supp
    incubator/lucy/trunk/perl/Build.PL
    incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm
    incubator/lucy/trunk/perl/lib/Lucy.pm
    incubator/lucy/trunk/perl/lib/Lucy/Analysis/Stemmer.pm

Modified: incubator/lucy/trunk/LICENSE
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/LICENSE?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/LICENSE (original)
+++ incubator/lucy/trunk/LICENSE Wed Nov 10 16:02:40 2010
@@ -298,4 +298,34 @@ the license for those materials:
     registered in some jurisdictions. All other trademarks and registered
     trademarks mentioned herein are the property of their respective owners.
 
+Portions of the Snowball stemming library are bundled with this distribution
+under modules/analysis/snowstem.  Here is the license for those materials:
+
+    Copyright (c) 2001, Dr Martin Porter
+    Copyright (c) 2002, Richard Boulton
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+        * Redistributions of source code must retain the above copyright notice,
+          this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above copyright
+          notice, this list of conditions and the following disclaimer in the
+          documentation and/or other materials provided with the distribution.
+        * Neither the name of the copyright holders nor the names of its contributors
+          may be used to endorse or promote products derived from this software
+          without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 

Modified: incubator/lucy/trunk/NOTICE
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/NOTICE?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/NOTICE (original)
+++ incubator/lucy/trunk/NOTICE Wed Nov 10 16:02:40 2010
@@ -10,3 +10,7 @@ International Business Machines Corporat
 This software contains code derived from Unicode data available from
 <http://www.unicode.org/Public/> and Copyright 1991-2010 Unicode, Inc.
 
+This software bundles code developed by the Snowball project at
+<http://snowball.tartarus.org>, Copyright (c) 2001, Dr Martin Porter and
+Copyright (c) 2002, Richard Boulton.
+

Modified: incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.c Wed Nov 10 16:02:40 2010
@@ -23,10 +23,7 @@
 #include "Lucy/Analysis/Token.h"
 #include "Lucy/Analysis/Inversion.h"
 
-Stemmer_sb_stemmer_new_t    Stemmer_sb_stemmer_new    = NULL;
-Stemmer_sb_stemmer_delete_t Stemmer_sb_stemmer_delete = NULL;
-Stemmer_sb_stemmer_stem_t   Stemmer_sb_stemmer_stem   = NULL;
-Stemmer_sb_stemmer_length_t Stemmer_sb_stemmer_length = NULL;
+#include "libstemmer.h"
 
 Stemmer*
 Stemmer_new(const CharBuf *language)
@@ -43,11 +40,10 @@ Stemmer_init(Stemmer *self, const CharBu
     self->language = CB_Clone(language);
 
     // Get a Snowball stemmer.  Be case-insensitive. 
-    Stemmer_load_snowball();
     lang_buf[0] = tolower(CB_Code_Point_At(language, 0));
     lang_buf[1] = tolower(CB_Code_Point_At(language, 1));
     lang_buf[2] = '\0';
-    self->snowstemmer = lucy_Stemmer_sb_stemmer_new(lang_buf, "UTF_8");
+    self->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
     if (!self->snowstemmer) 
         THROW(ERR, "Can't find a Snowball stemmer for %o", language);
 
@@ -58,7 +54,7 @@ void
 Stemmer_destroy(Stemmer *self)
 {
     if (self->snowstemmer) {
-        lucy_Stemmer_sb_stemmer_delete((struct sb_stemmer*)self->snowstemmer);
+        sb_stemmer_delete((struct sb_stemmer*)self->snowstemmer);
     }
     DECREF(self->language);
     SUPER_DESTROY(self, STEMMER);
@@ -72,9 +68,9 @@ Stemmer_transform(Stemmer *self, Inversi
         = (struct sb_stemmer*)self->snowstemmer;
 
     while (NULL != (token = Inversion_Next(inversion))) {
-        sb_symbol *stemmed_text = lucy_Stemmer_sb_stemmer_stem(snowstemmer, 
+        const sb_symbol *stemmed_text = sb_stemmer_stem(snowstemmer, 
             (sb_symbol*)token->text, token->len);
-        size_t len = lucy_Stemmer_sb_stemmer_length(snowstemmer);
+        size_t len = sb_stemmer_length(snowstemmer);
         if (len > token->len) {
             FREEMEM(token->text);
             token->text = (char*)MALLOCATE(len + 1);

Modified: incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Analysis/Stemmer.cfh Wed Nov 10 16:02:40 2010
@@ -16,35 +16,6 @@
 
 parcel Lucy;
 
-__C__
-typedef unsigned char sb_symbol;
-struct sb_stemmer;
-
-typedef struct sb_stemmer*
-(*lucy_Stemmer_sb_stemmer_new_t)(const char *algorithm, const char *encoding);
-typedef void
-(*lucy_Stemmer_sb_stemmer_delete_t)(struct sb_stemmer *snowstemmer);
-typedef sb_symbol*
-(*lucy_Stemmer_sb_stemmer_stem_t)(struct sb_stemmer *snowstemmer, 
-    const sb_symbol *text, int len);
-typedef int
-(*lucy_Stemmer_sb_stemmer_length_t)(struct sb_stemmer *snowstemmer);
-extern lucy_Stemmer_sb_stemmer_new_t    lucy_Stemmer_sb_stemmer_new;
-extern lucy_Stemmer_sb_stemmer_delete_t lucy_Stemmer_sb_stemmer_delete;
-extern lucy_Stemmer_sb_stemmer_stem_t   lucy_Stemmer_sb_stemmer_stem;
-extern lucy_Stemmer_sb_stemmer_length_t lucy_Stemmer_sb_stemmer_length;
-#ifdef LUCY_USE_SHORT_NAMES
-  #define Stemmer_sb_stemmer_new_t    lucy_Stemmer_sb_stemmer_new_t
-  #define Stemmer_sb_stemmer_delete_t lucy_Stemmer_sb_stemmer_delete_t
-  #define Stemmer_sb_stemmer_stem_t   lucy_Stemmer_sb_stemmer_stem_t
-  #define Stemmer_sb_stemmer_length_t lucy_Stemmer_sb_stemmer_length_t
-  #define Stemmer_sb_stemmer_new      lucy_Stemmer_sb_stemmer_new
-  #define Stemmer_sb_stemmer_delete   lucy_Stemmer_sb_stemmer_delete
-  #define Stemmer_sb_stemmer_stem     lucy_Stemmer_sb_stemmer_stem
-  #define Stemmer_sb_stemmer_length   lucy_Stemmer_sb_stemmer_length
-#endif
-__END_C__
-
 /** Reduce related words to a shared root.
  * 
  * Stemmer is an L<Analyzer|Lucy::Analysis::Analyzer> which reduces
@@ -82,11 +53,6 @@ class Lucy::Analysis::Stemmer inherits L
     public bool_t
     Equals(Stemmer *self, Obj *other);
 
-    /** Load the Snowball stemming library.  Called by the constructor.
-     */
-    inert void
-    load_snowball();
-
     public void
     Destroy(Stemmer *self);
 }

Modified: incubator/lucy/trunk/devel/conf/lucyperl.supp
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/devel/conf/lucyperl.supp?rev=1033549&r1=1033548&r2=1033549&view=diff
==============================================================================
--- incubator/lucy/trunk/devel/conf/lucyperl.supp (original)
+++ incubator/lucy/trunk/devel/conf/lucyperl.supp Wed Nov 10 16:02:40 2010
@@ -101,13 +101,4 @@
    fun:*
 }
 
-{
-   <Snowball XS constructor>
-   Memcheck:Leak
-   fun:calloc
-   fun:Perl_safesyscalloc
-   fun:XS_Lingua__Stem__Snowball__Stemmifier_new
-   fun:*
-}
-
 

Added: incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/devel/update_snowstem.pl Wed Nov 10 16:02:40 2010
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use File::Spec::Functions qw( catfile catdir no_upwards );
+use File::Copy qw( copy );
+use Cwd qw( getcwd );
+
+if ( @ARGV != 2 ) {
+    die "Usage: perl update_snowstem.pl SNOWBALL_SVN_CO LUCY_SNOWBALL_DIR";
+}
+
+my ( $snow_co_dir, $dest_dir ) = @ARGV;
+die("Not a directory: '$snow_co_dir'") unless -d $snow_co_dir;
+
+my $retval = system( "svn", "update", "-r", "541", $snow_co_dir );
+die "svn update failed" if ( $retval >> 8 );
+
+my $oldpwd = getcwd();
+my $snow_build_dir = catdir( $snow_co_dir, 'snowball' );
+chdir($snow_build_dir) or die $!;
+$retval = system("make dist_libstemmer_c");
+die "'make dist_libstemmer_c' failed" if ( $retval >> 8 );
+chdir($oldpwd) or die $!;
+
+# Copy only UTF-8 Stemmer files.  Keep directory structure intact so that
+# compilation succeeds.
+copy_dir_contents( 'src_c', qr/UTF/ );
+copy_dir_contents('include');
+copy_dir_contents('runtime');
+copy_dir_contents( 'libstemmer', qr/utf8.[ch]$/ );
+
+# Add include guard to libstemmer.h.
+my $libstemmer_h_path
+    = catfile( $dest_dir, qw( source include libstemmer.h ) );
+open( my $libstemmer_h_fh, '<', $libstemmer_h_path )
+    or die "Can't open '$libstemmer_h_path': $!";
+my $libstemmer_h_content = do { local $/; <$libstemmer_h_fh> };
+close $libstemmer_h_fh or die $!;
+open( $libstemmer_h_fh, '>', $libstemmer_h_path )
+    or die "Can't open '$libstemmer_h_path': $!";
+print $libstemmer_h_fh <<END_STUFF;
+#ifndef H_LIBSTEMMER
+#define H_LIBSTEMMER
+
+$libstemmer_h_content
+
+#endif /* H_LIBSTEMMER */
+
+END_STUFF
+
+sub copy_dir_contents {
+    my ( $dir_name, $pattern ) = @_;
+    my $from_dir = catdir( $snow_build_dir, $dir_name );
+    my $to_dir = catdir( $dest_dir, 'source', $dir_name );
+    opendir( my $dh, $from_dir )
+        or die "Can't opendir '$from_dir': $!";
+    die "Not a directory: '$to_dir'" unless -d $to_dir;
+    for my $file ( no_upwards( readdir $dh ) ) {
+        next if $pattern && $file !~ $pattern;
+        next if $file =~ /\.svn/;
+        my $from = catfile( $from_dir, $file );
+        my $to   = catfile( $to_dir,   $file );
+        copy( $from, $to ) or die "Can't copy '$from' to '$to': $!";
+    }
+    closedir $dh or die $!;
+}
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/include/libstemmer.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,86 @@
+#ifndef H_LIBSTEMMER
+#define H_LIBSTEMMER
+
+
+/* Make header file work when included from C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sb_stemmer;
+typedef unsigned char sb_symbol;
+
+/* FIXME - should be able to get a version number for each stemming
+ * algorithm (which will be incremented each time the output changes). */
+
+/** Returns an array of the names of the available stemming algorithms.
+ *  Note that these are the canonical names - aliases (ie, other names for
+ *  the same algorithm) will not be included in the list.
+ *  The list is terminated with a null pointer.
+ *
+ *  The list must not be modified in any way.
+ */
+const char ** sb_stemmer_list(void);
+
+/** Create a new stemmer object, using the specified algorithm, for the
+ *  specified character encoding.
+ *
+ *  All algorithms will usually be available in UTF-8, but may also be
+ *  available in other character encodings.
+ *
+ *  @param algorithm The algorithm name.  This is either the english
+ *  name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
+ *  language.  Note that case is significant in this parameter - the
+ *  value should be supplied in lower case.
+ *
+ *  @param charenc The character encoding.  NULL may be passed as
+ *  this value, in which case UTF-8 encoding will be assumed. Otherwise,
+ *  the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1),
+ *  "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian).  Note that
+ *  case is significant in this parameter.
+ *
+ *  @return NULL if the specified algorithm is not recognised, or the
+ *  algorithm is not available for the requested encoding.  Otherwise,
+ *  returns a pointer to a newly created stemmer for the requested algorithm.
+ *  The returned pointer must be deleted by calling sb_stemmer_delete().
+ *
+ *  @note NULL will also be returned if an out of memory error occurs.
+ */
+struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
+
+/** Delete a stemmer object.
+ *
+ *  This frees all resources allocated for the stemmer.  After calling
+ *  this function, the supplied stemmer may no longer be used in any way.
+ *
+ *  It is safe to pass a null pointer to this function - this will have
+ *  no effect.
+ */
+void                sb_stemmer_delete(struct sb_stemmer * stemmer);
+
+/** Stem a word.
+ *
+ *  The return value is owned by the stemmer - it must not be freed or
+ *  modified, and it will become invalid when the stemmer is called again,
+ *  or if the stemmer is freed.
+ *
+ *  The length of the return value can be obtained using sb_stemmer_length().
+ *
+ *  If an out-of-memory error occurs, this will return NULL.
+ */
+const sb_symbol *   sb_stemmer_stem(struct sb_stemmer * stemmer,
+				    const sb_symbol * word, int size);
+
+/** Get the length of the result of the last stemmed word.
+ *  This should not be called before sb_stemmer_stem() has been called.
+ */
+int                 sb_stemmer_length(struct sb_stemmer * stemmer);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* H_LIBSTEMMER */
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/libstemmer_utf8.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,95 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include "../include/libstemmer.h"
+#include "../runtime/api.h"
+#include "modules_utf8.h"
+
+struct sb_stemmer {
+    struct SN_env * (*create)(void);
+    void (*close)(struct SN_env *);
+    int (*stem)(struct SN_env *);
+
+    struct SN_env * env;
+};
+
+extern const char **
+sb_stemmer_list(void)
+{
+    return algorithm_names;
+}
+
+static stemmer_encoding_t
+sb_getenc(const char * charenc)
+{
+    struct stemmer_encoding * encoding;
+    if (charenc == NULL) return ENC_UTF_8;
+    for (encoding = encodings; encoding->name != 0; encoding++) {
+	if (strcmp(encoding->name, charenc) == 0) break;
+    }
+    if (encoding->name == NULL) return ENC_UNKNOWN;
+    return encoding->enc;
+}
+
+extern struct sb_stemmer *
+sb_stemmer_new(const char * algorithm, const char * charenc)
+{
+    stemmer_encoding_t enc;
+    struct stemmer_modules * module;
+    struct sb_stemmer * stemmer;
+
+    enc = sb_getenc(charenc);
+    if (enc == ENC_UNKNOWN) return NULL;
+
+    for (module = modules; module->name != 0; module++) {
+	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
+    }
+    if (module->name == NULL) return NULL;
+    
+    stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
+    if (stemmer == NULL) return NULL;
+
+    stemmer->create = module->create;
+    stemmer->close = module->close;
+    stemmer->stem = module->stem;
+
+    stemmer->env = stemmer->create();
+    if (stemmer->env == NULL)
+    {
+        sb_stemmer_delete(stemmer);
+        return NULL;
+    }
+
+    return stemmer;
+}
+
+void
+sb_stemmer_delete(struct sb_stemmer * stemmer)
+{
+    if (stemmer == 0) return;
+    if (stemmer->close == 0) return;
+    stemmer->close(stemmer->env);
+    stemmer->close = 0;
+    free(stemmer);
+}
+
+const sb_symbol *
+sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
+{
+    int ret;
+    if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
+    {
+        stemmer->env->l = 0;
+        return NULL;
+    }
+    ret = stemmer->stem(stemmer->env);
+    if (ret < 0) return NULL;
+    stemmer->env->p[stemmer->env->l] = 0;
+    return (const sb_symbol *)(stemmer->env->p);
+}
+
+int
+sb_stemmer_length(struct sb_stemmer * stemmer)
+{
+    return stemmer->env->l;
+}

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/libstemmer/modules_utf8.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,121 @@
+/* libstemmer/modules_utf8.h: List of stemming modules.
+ *
+ * This file is generated by mkmodules.pl from a list of module names.
+ * Do not edit manually.
+ *
+ * Modules included by this file are: danish, dutch, english, finnish, french,
+ * german, hungarian, italian, norwegian, porter, portuguese, romanian,
+ * russian, spanish, swedish, turkish
+ */
+
+#include "../src_c/stem_UTF_8_danish.h"
+#include "../src_c/stem_UTF_8_dutch.h"
+#include "../src_c/stem_UTF_8_english.h"
+#include "../src_c/stem_UTF_8_finnish.h"
+#include "../src_c/stem_UTF_8_french.h"
+#include "../src_c/stem_UTF_8_german.h"
+#include "../src_c/stem_UTF_8_hungarian.h"
+#include "../src_c/stem_UTF_8_italian.h"
+#include "../src_c/stem_UTF_8_norwegian.h"
+#include "../src_c/stem_UTF_8_porter.h"
+#include "../src_c/stem_UTF_8_portuguese.h"
+#include "../src_c/stem_UTF_8_romanian.h"
+#include "../src_c/stem_UTF_8_russian.h"
+#include "../src_c/stem_UTF_8_spanish.h"
+#include "../src_c/stem_UTF_8_swedish.h"
+#include "../src_c/stem_UTF_8_turkish.h"
+
+typedef enum {
+  ENC_UNKNOWN=0,
+  ENC_UTF_8
+} stemmer_encoding_t;
+
+struct stemmer_encoding {
+  const char * name;
+  stemmer_encoding_t enc;
+};
+static struct stemmer_encoding encodings[] = {
+  {"UTF_8", ENC_UTF_8},
+  {0,ENC_UNKNOWN}
+};
+
+struct stemmer_modules {
+  const char * name;
+  stemmer_encoding_t enc; 
+  struct SN_env * (*create)(void);
+  void (*close)(struct SN_env *);
+  int (*stem)(struct SN_env *);
+};
+static struct stemmer_modules modules[] = {
+  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
+  {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {0,ENC_UNKNOWN,0,0,0}
+};
+static const char * algorithm_names[] = {
+  "danish", 
+  "dutch", 
+  "english", 
+  "finnish", 
+  "french", 
+  "german", 
+  "hungarian", 
+  "italian", 
+  "norwegian", 
+  "porter", 
+  "portuguese", 
+  "romanian", 
+  "russian", 
+  "spanish", 
+  "swedish", 
+  "turkish", 
+  0
+};

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,66 @@
+
+#include <stdlib.h> /* for calloc, free */
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{
+    struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    if (z == NULL) return NULL;
+    z->p = create_s();
+    if (z->p == NULL) goto error;
+    if (S_size)
+    {
+        int i;
+        z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        if (z->S == NULL) goto error;
+
+        for (i = 0; i < S_size; i++)
+        {
+            z->S[i] = create_s();
+            if (z->S[i] == NULL) goto error;
+        }
+    }
+
+    if (I_size)
+    {
+        z->I = (int *) calloc(I_size, sizeof(int));
+        if (z->I == NULL) goto error;
+    }
+
+    if (B_size)
+    {
+        z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
+        if (z->B == NULL) goto error;
+    }
+
+    return z;
+error:
+    SN_close_env(z, S_size);
+    return NULL;
+}
+
+extern void SN_close_env(struct SN_env * z, int S_size)
+{
+    if (z == NULL) return;
+    if (S_size)
+    {
+        int i;
+        for (i = 0; i < S_size; i++)
+        {
+            lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    free(z->I);
+    free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    int err = replace_s(z, 0, z->l, size, s, NULL);
+    z->c = 0;
+    return err;
+}
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/api.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,26 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int l; int lb; int bra; int ket;
+    symbol * * S;
+    int * I;
+    unsigned char * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z, int S_size);
+
+extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/header.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,58 @@
+
+#include <limits.h>
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    const symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
+
+extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+
+extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+
+extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
+extern int eq_v(struct SN_env * z, const symbol * p);
+extern int eq_v_b(struct SN_env * z, const symbol * p);
+
+extern int find_among(struct SN_env * z, const struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
+extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
+extern int slice_from_v(struct SN_env * z, const symbol * p);
+extern int slice_del(struct SN_env * z);
+
+extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
+extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/runtime/utilities.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,478 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void) {
+    symbol * p;
+    void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
+    if (mem == NULL) return NULL;
+    p = (symbol *) (HEAD + (char *) mem);
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) {
+    if (p == NULL) return;
+    free((char *) p - HEAD);
+}
+
+/*
+   new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
+   if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
+   position, or 0 on failure.
+
+   -- used to implement hop and next in the utf8 case.
+*/
+
+extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
+    int b;
+    if (n >= 0) {
+        for (; n > 0; n--) {
+            if (c >= l) return -1;
+            b = p[c++];
+            if (b >= 0xC0) {   /* 1100 0000 */
+                while (c < l) {
+                    b = p[c];
+                    if (b >= 0xC0 || b < 0x80) break;
+                    /* break unless b is 10------ */
+                    c++;
+                }
+            }
+        }
+    } else {
+        for (; n < 0; n++) {
+            if (c <= lb) return -1;
+            b = p[--c];
+            if (b >= 0x80) {   /* 1000 0000 */
+                while (c > lb) {
+                    b = p[c];
+                    if (b >= 0xC0) break; /* 1100 0000 */
+                    c--;
+                }
+            }
+        }
+    }
+    return c;
+}
+
+/* Code for character groupings: utf8 cases */
+
+static int get_utf8(const symbol * p, int c, int l, int * slot) {
+    int b0, b1;
+    if (c >= l) return 0;
+    b0 = p[c++];
+    if (b0 < 0xC0 || c == l) {   /* 1100 0000 */
+        * slot = b0; return 1;
+    }
+    b1 = p[c++];
+    if (b0 < 0xE0 || c == l) {   /* 1110 0000 */
+        * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
+    }
+    * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
+}
+
+static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
+    int b0, b1;
+    if (c <= lb) return 0;
+    b0 = p[--c];
+    if (b0 < 0x80 || c == lb) {   /* 1000 0000 */
+        * slot = b0; return 1;
+    }
+    b1 = p[--c];
+    if (b1 >= 0xC0 || c == lb) {   /* 1100 0000 */
+        * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
+    }
+    * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
+}
+
+extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_utf8(z->p, z->c, z->l, & ch);
+	unless (w) return -1;
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c += w;
+    } while (repeat);
+    return 0;
+}
+
+extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_b_utf8(z->p, z->c, z->lb, & ch);
+	unless (w) return -1;
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c -= w;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_utf8(z->p, z->c, z->l, & ch);
+	unless (w) return -1;
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c += w;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_b_utf8(z->p, z->c, z->lb, & ch);
+	unless (w) return -1;
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c -= w;
+    } while (repeat);
+    return 0;
+}
+
+/* Code for character groupings: non-utf8 cases */
+
+extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c >= z->l) return -1;
+	ch = z->p[z->c];
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c++;
+    } while (repeat);
+    return 0;
+}
+
+extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c <= z->lb) return -1;
+	ch = z->p[z->c - 1];
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c--;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c >= z->l) return -1;
+	ch = z->p[z->c];
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c++;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c <= z->lb) return -1;
+	ch = z->p[z->c - 1];
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c--;
+    } while (repeat);
+    return 0;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
+    if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
+    if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, const symbol * p) {
+    return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, const symbol * p) {
+    return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
+
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    const struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1) {
+        int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {
+            int i2; for (i2 = common; i2 < w->s_size; i2++) {
+                if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i2];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1) {
+            if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1) {
+        w = v + i;
+        if (common_i >= w->s_size) {
+            z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {
+                int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
+
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    const struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1) {
+        int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {
+            int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
+                if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i2];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1) {
+            if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1) {
+        w = v + i;
+        if (common_i >= w->s_size) {
+            z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {
+                int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+/* Increase the size of the buffer pointed to by p to at least n symbols.
+ * If insufficient memory, returns NULL and frees the old buffer.
+ */
+static symbol * increase_size(symbol * p, int n) {
+    symbol * q;
+    int new_size = n + 20;
+    void * mem = realloc((char *) p - HEAD,
+                         HEAD + (new_size + 1) * sizeof(symbol));
+    if (mem == NULL) {
+        lose_s(p);
+        return NULL;
+    }
+    q = (symbol *) (HEAD + (char *)mem);
+    CAPACITY(q) = new_size;
+    return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s.
+   Returns 0 on success, -1 on error.
+   Also, frees z->p (and sets it to NULL) on error.
+*/
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
+{
+    int adjustment;
+    int len;
+    if (z->p == NULL) {
+        z->p = create_s();
+        if (z->p == NULL) return -1;
+    }
+    adjustment = s_size - (c_ket - c_bra);
+    len = SIZE(z->p);
+    if (adjustment != 0) {
+        if (adjustment + len > CAPACITY(z->p)) {
+            z->p = increase_size(z->p, adjustment + len);
+            if (z->p == NULL) return -1;
+        }
+        memmove(z->p + c_ket + adjustment,
+                z->p + c_ket,
+                (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket)
+            z->c += adjustment;
+        else
+            if (z->c > c_bra)
+                z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    if (adjptr != NULL)
+        *adjptr = adjustment;
+    return 0;
+}
+
+static int slice_check(struct SN_env * z) {
+
+    if (z->bra < 0 ||
+        z->bra > z->ket ||
+        z->ket > z->l ||
+        z->p == NULL ||
+        z->l > SIZE(z->p)) /* this line could be removed */
+    {
+#if 0
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+#endif
+        return -1;
+    }
+    return 0;
+}
+
+extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
+    if (slice_check(z)) return -1;
+    return replace_s(z, z->bra, z->ket, s_size, s, NULL);
+}
+
+extern int slice_from_v(struct SN_env * z, const symbol * p) {
+    return slice_from_s(z, SIZE(p), p);
+}
+
+extern int slice_del(struct SN_env * z) {
+    return slice_from_s(z, 0, 0);
+}
+
+extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
+    int adjustment;
+    if (replace_s(z, bra, ket, s_size, s, &adjustment))
+        return -1;
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+    return 0;
+}
+
+extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
+    int adjustment;
+    if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
+        return -1;
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+    return 0;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p) {
+    if (slice_check(z)) {
+        lose_s(p);
+        return NULL;
+    }
+    {
+        int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) {
+            p = increase_size(p, len);
+            if (p == NULL)
+                return NULL;
+        }
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p) {
+    int len = z->l;
+    if (CAPACITY(p) < len) {
+        p = increase_size(p, len);
+        if (p == NULL)
+            return NULL;
+    }
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+#if 0
+extern void debug(struct SN_env * z, int number, int line_count) {
+    int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++) {
+        if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}
+#endif

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,339 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "../runtime/header.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int danish_UTF_8_stem(struct SN_env * z);
+#ifdef __cplusplus
+}
+#endif
+static int r_undouble(struct SN_env * z);
+static int r_other_suffix(struct SN_env * z);
+static int r_consonant_pair(struct SN_env * z);
+static int r_main_suffix(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+extern struct SN_env * danish_UTF_8_create_env(void);
+extern void danish_UTF_8_close_env(struct SN_env * z);
+
+
+#ifdef __cplusplus
+}
+#endif
+static const symbol s_0_0[3] = { 'h', 'e', 'd' };
+static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
+static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
+static const symbol s_0_3[1] = { 'e' };
+static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
+static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
+static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
+static const symbol s_0_7[3] = { 'e', 'n', 'e' };
+static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
+static const symbol s_0_9[3] = { 'e', 'r', 'e' };
+static const symbol s_0_10[2] = { 'e', 'n' };
+static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
+static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
+static const symbol s_0_13[2] = { 'e', 'r' };
+static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
+static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
+static const symbol s_0_16[1] = { 's' };
+static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
+static const symbol s_0_18[2] = { 'e', 's' };
+static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
+static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
+static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
+static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
+static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
+static const symbol s_0_24[3] = { 'e', 'n', 's' };
+static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
+static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
+static const symbol s_0_27[3] = { 'e', 'r', 's' };
+static const symbol s_0_28[3] = { 'e', 't', 's' };
+static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
+static const symbol s_0_30[2] = { 'e', 't' };
+static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
+
+static const struct among a_0[32] =
+{
+/*  0 */ { 3, s_0_0, -1, 1, 0},
+/*  1 */ { 5, s_0_1, 0, 1, 0},
+/*  2 */ { 4, s_0_2, -1, 1, 0},
+/*  3 */ { 1, s_0_3, -1, 1, 0},
+/*  4 */ { 5, s_0_4, 3, 1, 0},
+/*  5 */ { 4, s_0_5, 3, 1, 0},
+/*  6 */ { 6, s_0_6, 5, 1, 0},
+/*  7 */ { 3, s_0_7, 3, 1, 0},
+/*  8 */ { 4, s_0_8, 3, 1, 0},
+/*  9 */ { 3, s_0_9, 3, 1, 0},
+/* 10 */ { 2, s_0_10, -1, 1, 0},
+/* 11 */ { 5, s_0_11, 10, 1, 0},
+/* 12 */ { 4, s_0_12, 10, 1, 0},
+/* 13 */ { 2, s_0_13, -1, 1, 0},
+/* 14 */ { 5, s_0_14, 13, 1, 0},
+/* 15 */ { 4, s_0_15, 13, 1, 0},
+/* 16 */ { 1, s_0_16, -1, 2, 0},
+/* 17 */ { 4, s_0_17, 16, 1, 0},
+/* 18 */ { 2, s_0_18, 16, 1, 0},
+/* 19 */ { 5, s_0_19, 18, 1, 0},
+/* 20 */ { 7, s_0_20, 19, 1, 0},
+/* 21 */ { 4, s_0_21, 18, 1, 0},
+/* 22 */ { 5, s_0_22, 18, 1, 0},
+/* 23 */ { 4, s_0_23, 18, 1, 0},
+/* 24 */ { 3, s_0_24, 16, 1, 0},
+/* 25 */ { 6, s_0_25, 24, 1, 0},
+/* 26 */ { 5, s_0_26, 24, 1, 0},
+/* 27 */ { 3, s_0_27, 16, 1, 0},
+/* 28 */ { 3, s_0_28, 16, 1, 0},
+/* 29 */ { 5, s_0_29, 28, 1, 0},
+/* 30 */ { 2, s_0_30, -1, 1, 0},
+/* 31 */ { 4, s_0_31, 30, 1, 0}
+};
+
+static const symbol s_1_0[2] = { 'g', 'd' };
+static const symbol s_1_1[2] = { 'd', 't' };
+static const symbol s_1_2[2] = { 'g', 't' };
+static const symbol s_1_3[2] = { 'k', 't' };
+
+static const struct among a_1[4] =
+{
+/*  0 */ { 2, s_1_0, -1, -1, 0},
+/*  1 */ { 2, s_1_1, -1, -1, 0},
+/*  2 */ { 2, s_1_2, -1, -1, 0},
+/*  3 */ { 2, s_1_3, -1, -1, 0}
+};
+
+static const symbol s_2_0[2] = { 'i', 'g' };
+static const symbol s_2_1[3] = { 'l', 'i', 'g' };
+static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
+static const symbol s_2_3[3] = { 'e', 'l', 's' };
+static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' };
+
+static const struct among a_2[5] =
+{
+/*  0 */ { 2, s_2_0, -1, 1, 0},
+/*  1 */ { 3, s_2_1, 0, 1, 0},
+/*  2 */ { 4, s_2_2, 1, 1, 0},
+/*  3 */ { 3, s_2_3, -1, 1, 0},
+/*  4 */ { 5, s_2_4, -1, 2, 0}
+};
+
+static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+
+static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
+
+static const symbol s_0[] = { 's', 't' };
+static const symbol s_1[] = { 'i', 'g' };
+static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    {   int c_test = z->c; /* test, line 33 */
+        {   int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
+            if (ret < 0) return 0;
+            z->c = ret; /* hop, line 33 */
+        }
+        z->I[1] = z->c; /* setmark x, line 33 */
+        z->c = c_test;
+    }
+    if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
+    {    /* gopast */ /* non v, line 34 */
+        int ret = in_grouping_U(z, g_v, 97, 248, 1);
+        if (ret < 0) return 0;
+        z->c += ret;
+    }
+    z->I[0] = z->c; /* setmark p1, line 34 */
+     /* try, line 35 */
+    if (!(z->I[0] < z->I[1])) goto lab0;
+    z->I[0] = z->I[1];
+lab0:
+    return 1;
+}
+
+static int r_main_suffix(struct SN_env * z) {
+    int among_var;
+    {   int mlimit; /* setlimit, line 41 */
+        int m1 = z->l - z->c; (void)m1;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 41 */
+        mlimit = z->lb; z->lb = z->c;
+        z->c = z->l - m1;
+        z->ket = z->c; /* [, line 41 */
+        if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
+        among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
+        if (!(among_var)) { z->lb = mlimit; return 0; }
+        z->bra = z->c; /* ], line 41 */
+        z->lb = mlimit;
+    }
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int ret = slice_del(z); /* delete, line 48 */
+                if (ret < 0) return ret;
+            }
+            break;
+        case 2:
+            if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0;
+            {   int ret = slice_del(z); /* delete, line 50 */
+                if (ret < 0) return ret;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_consonant_pair(struct SN_env * z) {
+    {   int m_test = z->l - z->c; /* test, line 55 */
+        {   int mlimit; /* setlimit, line 56 */
+            int m1 = z->l - z->c; (void)m1;
+            if (z->c < z->I[0]) return 0;
+            z->c = z->I[0]; /* tomark, line 56 */
+            mlimit = z->lb; z->lb = z->c;
+            z->c = z->l - m1;
+            z->ket = z->c; /* [, line 56 */
+            if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; }
+            if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */
+            z->bra = z->c; /* ], line 56 */
+            z->lb = mlimit;
+        }
+        z->c = z->l - m_test;
+    }
+    {   int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
+        if (ret < 0) return 0;
+        z->c = ret; /* next, line 62 */
+    }
+    z->bra = z->c; /* ], line 62 */
+    {   int ret = slice_del(z); /* delete, line 62 */
+        if (ret < 0) return ret;
+    }
+    return 1;
+}
+
+static int r_other_suffix(struct SN_env * z) {
+    int among_var;
+    {   int m1 = z->l - z->c; (void)m1; /* do, line 66 */
+        z->ket = z->c; /* [, line 66 */
+        if (!(eq_s_b(z, 2, s_0))) goto lab0;
+        z->bra = z->c; /* ], line 66 */
+        if (!(eq_s_b(z, 2, s_1))) goto lab0;
+        {   int ret = slice_del(z); /* delete, line 66 */
+            if (ret < 0) return ret;
+        }
+    lab0:
+        z->c = z->l - m1;
+    }
+    {   int mlimit; /* setlimit, line 67 */
+        int m2 = z->l - z->c; (void)m2;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 67 */
+        mlimit = z->lb; z->lb = z->c;
+        z->c = z->l - m2;
+        z->ket = z->c; /* [, line 67 */
+        if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
+        among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
+        if (!(among_var)) { z->lb = mlimit; return 0; }
+        z->bra = z->c; /* ], line 67 */
+        z->lb = mlimit;
+    }
+    switch(among_var) {
+        case 0: return 0;
+        case 1:
+            {   int ret = slice_del(z); /* delete, line 70 */
+                if (ret < 0) return ret;
+            }
+            {   int m3 = z->l - z->c; (void)m3; /* do, line 70 */
+                {   int ret = r_consonant_pair(z);
+                    if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
+                    if (ret < 0) return ret;
+                }
+            lab1:
+                z->c = z->l - m3;
+            }
+            break;
+        case 2:
+            {   int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
+                if (ret < 0) return ret;
+            }
+            break;
+    }
+    return 1;
+}
+
+static int r_undouble(struct SN_env * z) {
+    {   int mlimit; /* setlimit, line 76 */
+        int m1 = z->l - z->c; (void)m1;
+        if (z->c < z->I[0]) return 0;
+        z->c = z->I[0]; /* tomark, line 76 */
+        mlimit = z->lb; z->lb = z->c;
+        z->c = z->l - m1;
+        z->ket = z->c; /* [, line 76 */
+        if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; }
+        z->bra = z->c; /* ], line 76 */
+        z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
+        if (z->S[0] == 0) return -1; /* -> ch, line 76 */
+        z->lb = mlimit;
+    }
+    if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
+    {   int ret = slice_del(z); /* delete, line 78 */
+        if (ret < 0) return ret;
+    }
+    return 1;
+}
+
+extern int danish_UTF_8_stem(struct SN_env * z) {
+    {   int c1 = z->c; /* do, line 84 */
+        {   int ret = r_mark_regions(z);
+            if (ret == 0) goto lab0; /* call mark_regions, line 84 */
+            if (ret < 0) return ret;
+        }
+    lab0:
+        z->c = c1;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 85 */
+
+    {   int m2 = z->l - z->c; (void)m2; /* do, line 86 */
+        {   int ret = r_main_suffix(z);
+            if (ret == 0) goto lab1; /* call main_suffix, line 86 */
+            if (ret < 0) return ret;
+        }
+    lab1:
+        z->c = z->l - m2;
+    }
+    {   int m3 = z->l - z->c; (void)m3; /* do, line 87 */
+        {   int ret = r_consonant_pair(z);
+            if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
+            if (ret < 0) return ret;
+        }
+    lab2:
+        z->c = z->l - m3;
+    }
+    {   int m4 = z->l - z->c; (void)m4; /* do, line 88 */
+        {   int ret = r_other_suffix(z);
+            if (ret == 0) goto lab3; /* call other_suffix, line 88 */
+            if (ret < 0) return ret;
+        }
+    lab3:
+        z->c = z->l - m4;
+    }
+    {   int m5 = z->l - z->c; (void)m5; /* do, line 89 */
+        {   int ret = r_undouble(z);
+            if (ret == 0) goto lab4; /* call undouble, line 89 */
+            if (ret < 0) return ret;
+        }
+    lab4:
+        z->c = z->l - m5;
+    }
+    z->c = z->lb;
+    return 1;
+}
+
+extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); }
+
+extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); }
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_danish.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,16 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct SN_env * danish_UTF_8_create_env(void);
+extern void danish_UTF_8_close_env(struct SN_env * z);
+
+extern int danish_UTF_8_stem(struct SN_env * z);
+
+#ifdef __cplusplus
+}
+#endif
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.c Wed Nov 10 16:02:40 2010
@@ -0,0 +1,634 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#include "../runtime/header.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int dutch_UTF_8_stem(struct SN_env * z);
+#ifdef __cplusplus
+}
+#endif
+static int r_standard_suffix(struct SN_env * z);
+static int r_undouble(struct SN_env * z);
+static int r_R2(struct SN_env * z);
+static int r_R1(struct SN_env * z);
+static int r_mark_regions(struct SN_env * z);
+static int r_en_ending(struct SN_env * z);
+static int r_e_ending(struct SN_env * z);
+static int r_postlude(struct SN_env * z);
+static int r_prelude(struct SN_env * z);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+extern struct SN_env * dutch_UTF_8_create_env(void);
+extern void dutch_UTF_8_close_env(struct SN_env * z);
+
+
+#ifdef __cplusplus
+}
+#endif
+static const symbol s_0_1[2] = { 0xC3, 0xA1 };
+static const symbol s_0_2[2] = { 0xC3, 0xA4 };
+static const symbol s_0_3[2] = { 0xC3, 0xA9 };
+static const symbol s_0_4[2] = { 0xC3, 0xAB };
+static const symbol s_0_5[2] = { 0xC3, 0xAD };
+static const symbol s_0_6[2] = { 0xC3, 0xAF };
+static const symbol s_0_7[2] = { 0xC3, 0xB3 };
+static const symbol s_0_8[2] = { 0xC3, 0xB6 };
+static const symbol s_0_9[2] = { 0xC3, 0xBA };
+static const symbol s_0_10[2] = { 0xC3, 0xBC };
+
+static const struct among a_0[11] =
+{
+/*  0 */ { 0, 0, -1, 6, 0},
+/*  1 */ { 2, s_0_1, 0, 1, 0},
+/*  2 */ { 2, s_0_2, 0, 1, 0},
+/*  3 */ { 2, s_0_3, 0, 2, 0},
+/*  4 */ { 2, s_0_4, 0, 2, 0},
+/*  5 */ { 2, s_0_5, 0, 3, 0},
+/*  6 */ { 2, s_0_6, 0, 3, 0},
+/*  7 */ { 2, s_0_7, 0, 4, 0},
+/*  8 */ { 2, s_0_8, 0, 4, 0},
+/*  9 */ { 2, s_0_9, 0, 5, 0},
+/* 10 */ { 2, s_0_10, 0, 5, 0}
+};
+
+static const symbol s_1_1[1] = { 'I' };
+static const symbol s_1_2[1] = { 'Y' };
+
+static const struct among a_1[3] =
+{
+/*  0 */ { 0, 0, -1, 3, 0},
+/*  1 */ { 1, s_1_1, 0, 2, 0},
+/*  2 */ { 1, s_1_2, 0, 1, 0}
+};
+
+static const symbol s_2_0[2] = { 'd', 'd' };
+static const symbol s_2_1[2] = { 'k', 'k' };
+static const symbol s_2_2[2] = { 't', 't' };
+
+static const struct among a_2[3] =
+{
+/*  0 */ { 2, s_2_0, -1, -1, 0},
+/*  1 */ { 2, s_2_1, -1, -1, 0},
+/*  2 */ { 2, s_2_2, -1, -1, 0}
+};
+
+static const symbol s_3_0[3] = { 'e', 'n', 'e' };
+static const symbol s_3_1[2] = { 's', 'e' };
+static const symbol s_3_2[2] = { 'e', 'n' };
+static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' };
+static const symbol s_3_4[1] = { 's' };
+
+static const struct among a_3[5] =
+{
+/*  0 */ { 3, s_3_0, -1, 2, 0},
+/*  1 */ { 2, s_3_1, -1, 3, 0},
+/*  2 */ { 2, s_3_2, -1, 2, 0},
+/*  3 */ { 5, s_3_3, 2, 1, 0},
+/*  4 */ { 1, s_3_4, -1, 3, 0}
+};
+
+static const symbol s_4_0[3] = { 'e', 'n', 'd' };
+static const symbol s_4_1[2] = { 'i', 'g' };
+static const symbol s_4_2[3] = { 'i', 'n', 'g' };
+static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' };
+static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' };
+static const symbol s_4_5[3] = { 'b', 'a', 'r' };
+
+static const struct among a_4[6] =
+{
+/*  0 */ { 3, s_4_0, -1, 1, 0},
+/*  1 */ { 2, s_4_1, -1, 2, 0},
+/*  2 */ { 3, s_4_2, -1, 1, 0},
+/*  3 */ { 4, s_4_3, -1, 3, 0},
+/*  4 */ { 4, s_4_4, -1, 4, 0},
+/*  5 */ { 3, s_4_5, -1, 5, 0}
+};
+
+static const symbol s_5_0[2] = { 'a', 'a' };
+static const symbol s_5_1[2] = { 'e', 'e' };
+static const symbol s_5_2[2] = { 'o', 'o' };
+static const symbol s_5_3[2] = { 'u', 'u' };
+
+static const struct among a_5[4] =
+{
+/*  0 */ { 2, s_5_0, -1, -1, 0},
+/*  1 */ { 2, s_5_1, -1, -1, 0},
+/*  2 */ { 2, s_5_2, -1, -1, 0},
+/*  3 */ { 2, s_5_3, -1, -1, 0}
+};
+
+static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+static const symbol s_0[] = { 'a' };
+static const symbol s_1[] = { 'e' };
+static const symbol s_2[] = { 'i' };
+static const symbol s_3[] = { 'o' };
+static const symbol s_4[] = { 'u' };
+static const symbol s_5[] = { 'y' };
+static const symbol s_6[] = { 'Y' };
+static const symbol s_7[] = { 'i' };
+static const symbol s_8[] = { 'I' };
+static const symbol s_9[] = { 'y' };
+static const symbol s_10[] = { 'Y' };
+static const symbol s_11[] = { 'y' };
+static const symbol s_12[] = { 'i' };
+static const symbol s_13[] = { 'e' };
+static const symbol s_14[] = { 'g', 'e', 'm' };
+static const symbol s_15[] = { 'h', 'e', 'i', 'd' };
+static const symbol s_16[] = { 'h', 'e', 'i', 'd' };
+static const symbol s_17[] = { 'c' };
+static const symbol s_18[] = { 'e', 'n' };
+static const symbol s_19[] = { 'i', 'g' };
+static const symbol s_20[] = { 'e' };
+static const symbol s_21[] = { 'e' };
+
+static int r_prelude(struct SN_env * z) {
+    int among_var;
+    {   int c_test = z->c; /* test, line 42 */
+        while(1) { /* repeat, line 42 */
+            int c1 = z->c;
+            z->bra = z->c; /* [, line 43 */
+            if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else
+            among_var = find_among(z, a_0, 11); /* substring, line 43 */
+            if (!(among_var)) goto lab0;
+            z->ket = z->c; /* ], line 43 */
+            switch(among_var) {
+                case 0: goto lab0;
+                case 1:
+                    {   int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */
+                        if (ret < 0) return ret;
+                    }
+                    break;
+                case 2:
+                    {   int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */
+                        if (ret < 0) return ret;
+                    }
+                    break;
+                case 3:
+                    {   int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */
+                        if (ret < 0) return ret;
+                    }
+                    break;
+                case 4:
+                    {   int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */
+                        if (ret < 0) return ret;
+                    }
+                    break;
+                case 5:
+                    {   int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */
+                        if (ret < 0) return ret;
+                    }
+                    break;
+                case 6:
+                    {   int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
+                        if (ret < 0) goto lab0;
+                        z->c = ret; /* next, line 54 */
+                    }
+                    break;
+            }
+            continue;
+        lab0:
+            z->c = c1;
+            break;
+        }
+        z->c = c_test;
+    }
+    {   int c_keep = z->c; /* try, line 57 */
+        z->bra = z->c; /* [, line 57 */
+        if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; }
+        z->ket = z->c; /* ], line 57 */
+        {   int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */
+            if (ret < 0) return ret;
+        }
+    lab1:
+        ;
+    }
+    while(1) { /* repeat, line 58 */
+        int c2 = z->c;
+        while(1) { /* goto, line 58 */
+            int c3 = z->c;
+            if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3;
+            z->bra = z->c; /* [, line 59 */
+            {   int c4 = z->c; /* or, line 59 */
+                if (!(eq_s(z, 1, s_7))) goto lab5;
+                z->ket = z->c; /* ], line 59 */
+                if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5;
+                {   int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */
+                    if (ret < 0) return ret;
+                }
+                goto lab4;
+            lab5:
+                z->c = c4;
+                if (!(eq_s(z, 1, s_9))) goto lab3;
+                z->ket = z->c; /* ], line 60 */
+                {   int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */
+                    if (ret < 0) return ret;
+                }
+            }
+        lab4:
+            z->c = c3;
+            break;
+        lab3:
+            z->c = c3;
+            {   int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
+                if (ret < 0) goto lab2;
+                z->c = ret; /* goto, line 58 */
+            }
+        }
+        continue;
+    lab2:
+        z->c = c2;
+        break;
+    }
+    return 1;
+}
+
+static int r_mark_regions(struct SN_env * z) {
+    z->I[0] = z->l;
+    z->I[1] = z->l;
+    {    /* gopast */ /* grouping v, line 69 */
+        int ret = out_grouping_U(z, g_v, 97, 232, 1);
+        if (ret < 0) return 0;
+        z->c += ret;
+    }
+    {    /* gopast */ /* non v, line 69 */
+        int ret = in_grouping_U(z, g_v, 97, 232, 1);
+        if (ret < 0) return 0;
+        z->c += ret;
+    }
+    z->I[0] = z->c; /* setmark p1, line 69 */
+     /* try, line 70 */
+    if (!(z->I[0] < 3)) goto lab0;
+    z->I[0] = 3;
+lab0:
+    {    /* gopast */ /* grouping v, line 71 */
+        int ret = out_grouping_U(z, g_v, 97, 232, 1);
+        if (ret < 0) return 0;
+        z->c += ret;
+    }
+    {    /* gopast */ /* non v, line 71 */
+        int ret = in_grouping_U(z, g_v, 97, 232, 1);
+        if (ret < 0) return 0;
+        z->c += ret;
+    }
+    z->I[1] = z->c; /* setmark p2, line 71 */
+    return 1;
+}
+
+static int r_postlude(struct SN_env * z) {
+    int among_var;
+    while(1) { /* repeat, line 75 */
+        int c1 = z->c;
+        z->bra = z->c; /* [, line 77 */
+        if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else
+        among_var = find_among(z, a_1, 3); /* substring, line 77 */
+        if (!(among_var)) goto lab0;
+        z->ket = z->c; /* ], line 77 */
+        switch(among_var) {
+            case 0: goto lab0;
+            case 1:
+                {   int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 2:
+                {   int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 3:
+                {   int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
+                    if (ret < 0) goto lab0;
+                    z->c = ret; /* next, line 80 */
+                }
+                break;
+        }
+        continue;
+    lab0:
+        z->c = c1;
+        break;
+    }
+    return 1;
+}
+
+static int r_R1(struct SN_env * z) {
+    if (!(z->I[0] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_R2(struct SN_env * z) {
+    if (!(z->I[1] <= z->c)) return 0;
+    return 1;
+}
+
+static int r_undouble(struct SN_env * z) {
+    {   int m_test = z->l - z->c; /* test, line 91 */
+        if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
+        if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */
+        z->c = z->l - m_test;
+    }
+    z->ket = z->c; /* [, line 91 */
+    {   int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
+        if (ret < 0) return 0;
+        z->c = ret; /* next, line 91 */
+    }
+    z->bra = z->c; /* ], line 91 */
+    {   int ret = slice_del(z); /* delete, line 91 */
+        if (ret < 0) return ret;
+    }
+    return 1;
+}
+
+static int r_e_ending(struct SN_env * z) {
+    z->B[0] = 0; /* unset e_found, line 95 */
+    z->ket = z->c; /* [, line 96 */
+    if (!(eq_s_b(z, 1, s_13))) return 0;
+    z->bra = z->c; /* ], line 96 */
+    {   int ret = r_R1(z);
+        if (ret == 0) return 0; /* call R1, line 96 */
+        if (ret < 0) return ret;
+    }
+    {   int m_test = z->l - z->c; /* test, line 96 */
+        if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
+        z->c = z->l - m_test;
+    }
+    {   int ret = slice_del(z); /* delete, line 96 */
+        if (ret < 0) return ret;
+    }
+    z->B[0] = 1; /* set e_found, line 97 */
+    {   int ret = r_undouble(z);
+        if (ret == 0) return 0; /* call undouble, line 98 */
+        if (ret < 0) return ret;
+    }
+    return 1;
+}
+
+static int r_en_ending(struct SN_env * z) {
+    {   int ret = r_R1(z);
+        if (ret == 0) return 0; /* call R1, line 102 */
+        if (ret < 0) return ret;
+    }
+    {   int m1 = z->l - z->c; (void)m1; /* and, line 102 */
+        if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
+        z->c = z->l - m1;
+        {   int m2 = z->l - z->c; (void)m2; /* not, line 102 */
+            if (!(eq_s_b(z, 3, s_14))) goto lab0;
+            return 0;
+        lab0:
+            z->c = z->l - m2;
+        }
+    }
+    {   int ret = slice_del(z); /* delete, line 102 */
+        if (ret < 0) return ret;
+    }
+    {   int ret = r_undouble(z);
+        if (ret == 0) return 0; /* call undouble, line 103 */
+        if (ret < 0) return ret;
+    }
+    return 1;
+}
+
+static int r_standard_suffix(struct SN_env * z) {
+    int among_var;
+    {   int m1 = z->l - z->c; (void)m1; /* do, line 107 */
+        z->ket = z->c; /* [, line 108 */
+        if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
+        among_var = find_among_b(z, a_3, 5); /* substring, line 108 */
+        if (!(among_var)) goto lab0;
+        z->bra = z->c; /* ], line 108 */
+        switch(among_var) {
+            case 0: goto lab0;
+            case 1:
+                {   int ret = r_R1(z);
+                    if (ret == 0) goto lab0; /* call R1, line 110 */
+                    if (ret < 0) return ret;
+                }
+                {   int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 2:
+                {   int ret = r_en_ending(z);
+                    if (ret == 0) goto lab0; /* call en_ending, line 113 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 3:
+                {   int ret = r_R1(z);
+                    if (ret == 0) goto lab0; /* call R1, line 116 */
+                    if (ret < 0) return ret;
+                }
+                if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0;
+                {   int ret = slice_del(z); /* delete, line 116 */
+                    if (ret < 0) return ret;
+                }
+                break;
+        }
+    lab0:
+        z->c = z->l - m1;
+    }
+    {   int m2 = z->l - z->c; (void)m2; /* do, line 120 */
+        {   int ret = r_e_ending(z);
+            if (ret == 0) goto lab1; /* call e_ending, line 120 */
+            if (ret < 0) return ret;
+        }
+    lab1:
+        z->c = z->l - m2;
+    }
+    {   int m3 = z->l - z->c; (void)m3; /* do, line 122 */
+        z->ket = z->c; /* [, line 122 */
+        if (!(eq_s_b(z, 4, s_16))) goto lab2;
+        z->bra = z->c; /* ], line 122 */
+        {   int ret = r_R2(z);
+            if (ret == 0) goto lab2; /* call R2, line 122 */
+            if (ret < 0) return ret;
+        }
+        {   int m4 = z->l - z->c; (void)m4; /* not, line 122 */
+            if (!(eq_s_b(z, 1, s_17))) goto lab3;
+            goto lab2;
+        lab3:
+            z->c = z->l - m4;
+        }
+        {   int ret = slice_del(z); /* delete, line 122 */
+            if (ret < 0) return ret;
+        }
+        z->ket = z->c; /* [, line 123 */
+        if (!(eq_s_b(z, 2, s_18))) goto lab2;
+        z->bra = z->c; /* ], line 123 */
+        {   int ret = r_en_ending(z);
+            if (ret == 0) goto lab2; /* call en_ending, line 123 */
+            if (ret < 0) return ret;
+        }
+    lab2:
+        z->c = z->l - m3;
+    }
+    {   int m5 = z->l - z->c; (void)m5; /* do, line 126 */
+        z->ket = z->c; /* [, line 127 */
+        if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4;
+        among_var = find_among_b(z, a_4, 6); /* substring, line 127 */
+        if (!(among_var)) goto lab4;
+        z->bra = z->c; /* ], line 127 */
+        switch(among_var) {
+            case 0: goto lab4;
+            case 1:
+                {   int ret = r_R2(z);
+                    if (ret == 0) goto lab4; /* call R2, line 129 */
+                    if (ret < 0) return ret;
+                }
+                {   int ret = slice_del(z); /* delete, line 129 */
+                    if (ret < 0) return ret;
+                }
+                {   int m6 = z->l - z->c; (void)m6; /* or, line 130 */
+                    z->ket = z->c; /* [, line 130 */
+                    if (!(eq_s_b(z, 2, s_19))) goto lab6;
+                    z->bra = z->c; /* ], line 130 */
+                    {   int ret = r_R2(z);
+                        if (ret == 0) goto lab6; /* call R2, line 130 */
+                        if (ret < 0) return ret;
+                    }
+                    {   int m7 = z->l - z->c; (void)m7; /* not, line 130 */
+                        if (!(eq_s_b(z, 1, s_20))) goto lab7;
+                        goto lab6;
+                    lab7:
+                        z->c = z->l - m7;
+                    }
+                    {   int ret = slice_del(z); /* delete, line 130 */
+                        if (ret < 0) return ret;
+                    }
+                    goto lab5;
+                lab6:
+                    z->c = z->l - m6;
+                    {   int ret = r_undouble(z);
+                        if (ret == 0) goto lab4; /* call undouble, line 130 */
+                        if (ret < 0) return ret;
+                    }
+                }
+            lab5:
+                break;
+            case 2:
+                {   int ret = r_R2(z);
+                    if (ret == 0) goto lab4; /* call R2, line 133 */
+                    if (ret < 0) return ret;
+                }
+                {   int m8 = z->l - z->c; (void)m8; /* not, line 133 */
+                    if (!(eq_s_b(z, 1, s_21))) goto lab8;
+                    goto lab4;
+                lab8:
+                    z->c = z->l - m8;
+                }
+                {   int ret = slice_del(z); /* delete, line 133 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 3:
+                {   int ret = r_R2(z);
+                    if (ret == 0) goto lab4; /* call R2, line 136 */
+                    if (ret < 0) return ret;
+                }
+                {   int ret = slice_del(z); /* delete, line 136 */
+                    if (ret < 0) return ret;
+                }
+                {   int ret = r_e_ending(z);
+                    if (ret == 0) goto lab4; /* call e_ending, line 136 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 4:
+                {   int ret = r_R2(z);
+                    if (ret == 0) goto lab4; /* call R2, line 139 */
+                    if (ret < 0) return ret;
+                }
+                {   int ret = slice_del(z); /* delete, line 139 */
+                    if (ret < 0) return ret;
+                }
+                break;
+            case 5:
+                {   int ret = r_R2(z);
+                    if (ret == 0) goto lab4; /* call R2, line 142 */
+                    if (ret < 0) return ret;
+                }
+                if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */
+                {   int ret = slice_del(z); /* delete, line 142 */
+                    if (ret < 0) return ret;
+                }
+                break;
+        }
+    lab4:
+        z->c = z->l - m5;
+    }
+    {   int m9 = z->l - z->c; (void)m9; /* do, line 146 */
+        if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9;
+        {   int m_test = z->l - z->c; /* test, line 148 */
+            if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9;
+            if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */
+            if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9;
+            z->c = z->l - m_test;
+        }
+        z->ket = z->c; /* [, line 152 */
+        {   int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
+            if (ret < 0) goto lab9;
+            z->c = ret; /* next, line 152 */
+        }
+        z->bra = z->c; /* ], line 152 */
+        {   int ret = slice_del(z); /* delete, line 152 */
+            if (ret < 0) return ret;
+        }
+    lab9:
+        z->c = z->l - m9;
+    }
+    return 1;
+}
+
+extern int dutch_UTF_8_stem(struct SN_env * z) {
+    {   int c1 = z->c; /* do, line 159 */
+        {   int ret = r_prelude(z);
+            if (ret == 0) goto lab0; /* call prelude, line 159 */
+            if (ret < 0) return ret;
+        }
+    lab0:
+        z->c = c1;
+    }
+    {   int c2 = z->c; /* do, line 160 */
+        {   int ret = r_mark_regions(z);
+            if (ret == 0) goto lab1; /* call mark_regions, line 160 */
+            if (ret < 0) return ret;
+        }
+    lab1:
+        z->c = c2;
+    }
+    z->lb = z->c; z->c = z->l; /* backwards, line 161 */
+
+    {   int m3 = z->l - z->c; (void)m3; /* do, line 162 */
+        {   int ret = r_standard_suffix(z);
+            if (ret == 0) goto lab2; /* call standard_suffix, line 162 */
+            if (ret < 0) return ret;
+        }
+    lab2:
+        z->c = z->l - m3;
+    }
+    z->c = z->lb;
+    {   int c4 = z->c; /* do, line 163 */
+        {   int ret = r_postlude(z);
+            if (ret == 0) goto lab3; /* call postlude, line 163 */
+            if (ret < 0) return ret;
+        }
+    lab3:
+        z->c = c4;
+    }
+    return 1;
+}
+
+extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); }
+
+extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }
+

Added: incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h?rev=1033549&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h (added)
+++ incubator/lucy/trunk/modules/analysis/snowstem/source/src_c/stem_UTF_8_dutch.h Wed Nov 10 16:02:40 2010
@@ -0,0 +1,16 @@
+
+/* This file was generated automatically by the Snowball to ANSI C compiler */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct SN_env * dutch_UTF_8_create_env(void);
+extern void dutch_UTF_8_close_env(struct SN_env * z);
+
+extern int dutch_UTF_8_stem(struct SN_env * z);
+
+#ifdef __cplusplus
+}
+#endif
+