You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/12/06 15:12:10 UTC

svn commit: r483083 - in /spamassassin/branches/jm_re2c_hacks: lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm rules/v320.pre sa-compile.raw t/re_base_extraction.t

Author: jm
Date: Wed Dec  6 06:12:08 2006
New Revision: 483083

URL: http://svn.apache.org/viewvc?view=rev&rev=483083
Log:
fix a test failure; test was incorrect.  add a couple more test cases to elucidate.  also fall back to pre-perl-5.9.5 pattern extraction algorithm if regmust() is not available

Modified:
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
    spamassassin/branches/jm_re2c_hacks/rules/v320.pre
    spamassassin/branches/jm_re2c_hacks/sa-compile.raw
    spamassassin/branches/jm_re2c_hacks/t/re_base_extraction.t

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?view=diff&rev=483083&r1=483082&r2=483083
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Wed Dec  6 06:12:08 2006
@@ -119,11 +119,13 @@
     next if ($conf->{rules_to_replace}->{$name});
 
     my ($qr, $mods) = $self->simplify_and_qr_regexp($rule);
-    my ($anchored, $floating) = regmust(qr/$qr/);
-    my @bases1 = (quotemeta $anchored);
-    my @bases2 = (quotemeta $floating);
-    # my @bases1 = ();
-    # my @bases2 = ();
+    my ($anchored, $floating, @bases1, @bases2);
+
+    eval {
+      ($anchored, $floating) = regmust(qr/$qr/);
+      @bases1 = (quotemeta $anchored);
+      @bases2 = (quotemeta $floating);
+    };
 
     my $len1 = 0;
     my $len2 = 0;

Modified: spamassassin/branches/jm_re2c_hacks/rules/v320.pre
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/v320.pre?view=diff&rev=483083&r1=483082&r2=483083
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/v320.pre (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/v320.pre Wed Dec  6 06:12:08 2006
@@ -39,11 +39,11 @@
 loadplugin Mail::SpamAssassin::Plugin::WLBLEval
 
 # Rule2XSBody - speedup by compilation of ruleset to native code
-# loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody
+loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody
 
 # RabinKarpBody - EXPERIMENTAL speedup plugin, requires RabinKarpAccel module
 # loadplugin Mail::SpamAssassin::Plugin::RabinKarpBody
 
 # P595Body - EXPERIMENTAL speedup plugin, using bleadperl RE optimizations
-loadplugin Mail::SpamAssassin::Plugin::P595Body
+# loadplugin Mail::SpamAssassin::Plugin::P595Body
 

Modified: spamassassin/branches/jm_re2c_hacks/sa-compile.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/sa-compile.raw?view=diff&rev=483083&r1=483082&r2=483083
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/sa-compile.raw (original)
+++ spamassassin/branches/jm_re2c_hacks/sa-compile.raw Wed Dec  6 06:12:08 2006
@@ -666,8 +666,8 @@
     elsif ($tok eq '\\') {
       $re =~ /\G(.)/gc or die "\\ at end of string!";
       my $esc = $1;
-      if ($esc !~ /^
-                [\.\@\$\(\)\/\-\+\*\^\?\!_]
+      if ($esc =~ /^
+                [\"]
                 $/x)
       {
         die "Unsupported escape: \\$esc";

Modified: spamassassin/branches/jm_re2c_hacks/t/re_base_extraction.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/t/re_base_extraction.t?view=diff&rev=483083&r1=483082&r2=483083
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/t/re_base_extraction.t (original)
+++ spamassassin/branches/jm_re2c_hacks/t/re_base_extraction.t Wed Dec  6 06:12:08 2006
@@ -3,7 +3,7 @@
 # Test regular expression base-string extraction in
 # Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor
 
-use lib '.'; use lib 't';
+use lib '.'; use lib 't'; use lib '../lib';
 use SATest; sa_t_init("re_base_extraction");
 use Test;
 use strict;
@@ -13,7 +13,7 @@
   if (-e 't/test_dir') { chdir 't'; } 
   if (-e 'test_dir') { unshift(@INC, '../blib/lib'); }
 
-  plan tests => 25;
+  plan tests => 28;
 
 };
 use lib '../lib';
@@ -23,6 +23,8 @@
     body EXCUSE_REMOVE /to be removed from.{0,20}(?:mailings|offers)/i
     body KAM_STOCKTIP15 /(?:Nano Superlattice Technology|NSLT)/is
     body TEST1 /foo(?:ish)? bar/
+    body TEST1A /fo(?:oish|o) bar/
+    body TEST1B /fo(?:oish|o)? bar/
     body TEST2 /foody* bar/
     body TEST3 /foody? bar/
     body TEST4 /A(?i:ct) N(?i:ow)/
@@ -40,11 +42,12 @@
     bases_split_out_alternations => 1
 }, [
 
-    'foo bar:TEST1 FOO',
+    'fo bar:TEST1B',
+    'foo bar:TEST1 TEST1B TEST1A FOO',
     'to be removed from:EXCUSE_REMOVE',
     'nslt:KAM_STOCKTIP15',
     'nano superlattice technology:KAM_STOCKTIP15',
-    'fooish bar:TEST1',
+    'fooish bar:TEST1 TEST1B TEST1A',
     'act now:TEST4',
     'food:TEST2',
     'food bar:TEST3 TEST2',
@@ -67,7 +70,11 @@
     body FOO /foo bar/
     body EXCUSE_REMOVE /to be removed from.{0,20}(?:mailings|offers)/i
     body KAM_STOCKTIP15 /(?:Nano Superlattice Technology|NSLT)/is
-    body TEST1 /foo(?:ish)? bar/
+
+    # this should not result in a match on "foo bar" since we are not
+    # splitting alts in this test
+    body TEST1 /fo(?:oish|o)? bar/
+    body TEST2 /fo(?:oish|o) bar/
 
 ', {
     base_extract => 1,
@@ -83,8 +90,10 @@
 ],[
 
     'foo bar:FOO TEST1',
+    'foo bar:FOO TEST2',
     'nano superlattice technology:KAM_STOCKTIP15',
-    'fooish bar:TEST1'
+    'fooish bar:TEST1',
+    'fooish bar:TEST2'
 
 ]);
 ###########################################################################