You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/12/28 14:17:27 UTC

svn commit: r607239 - in /spamassassin/branches/3.2: lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm t/re_base_extraction.t

Author: jm
Date: Fri Dec 28 05:17:26 2007
New Revision: 607239

URL: http://svn.apache.org/viewvc?rev=607239&view=rev
Log:
bug 5696: cut regexp base strings at Unicode high codepoints, to avoid corruption of patterns containing UTF-8

Modified:
    spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
    spamassassin/branches/3.2/t/re_base_extraction.t

Modified: spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?rev=607239&r1=607238&r2=607239&view=diff
==============================================================================
--- spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Fri Dec 28 05:17:26 2007
@@ -119,7 +119,7 @@
 
   my $progress;
   $self->{show_progress} and $progress = Mail::SpamAssassin::Util::Progress->new({
-                total => scalar keys %{$rules},
+                total => (scalar keys %{$rules} || 1),
                 itemtype => 'rules',
               });
 
@@ -258,7 +258,7 @@
 
   $count = 0;
   $self->{show_progress} and $progress = Mail::SpamAssassin::Util::Progress->new({
-                total => scalar @good_bases,
+                total => (scalar @good_bases || 1),
                 itemtype => 'bases',
               });
 
@@ -480,7 +480,7 @@
       }
     }
   }
-  print $tmpfh "m".$quos.$rule.$quos.$mods;
+  print $tmpfh "use bytes; m".$quos.$rule.$quos.$mods;
   close $tmpfh or die "cannot write to $tmpf";
 
   my $perl = $self->get_perl();
@@ -582,8 +582,13 @@
       # we can do both, since we canonicalize to lc.
       if (!$spcs && $item =~ /^EXACT/ && $args =~ /<(.*)>/)
       {
-        $buf .= $1;
-        if (length $1 >= 55 && $buf =~ s/\.\.\.$//) {
+        my $str = $1;
+        $buf .= $str;
+        if ($buf =~ s/\\x\{[0-9a-fA-F]{4,}\}.*$//) {
+          # a high Unicode codepoint, interpreted by perl 5.8.x.  cut and stop
+          $add_candidate->();
+        }
+        if (length $str >= 55 && $buf =~ s/\.\.\.$//) {
           # perl 5.8.x truncates with a "..." here!  cut and stop
           $add_candidate->();
         }

Modified: spamassassin/branches/3.2/t/re_base_extraction.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.2/t/re_base_extraction.t?rev=607239&r1=607238&r2=607239&view=diff
==============================================================================
--- spamassassin/branches/3.2/t/re_base_extraction.t (original)
+++ spamassassin/branches/3.2/t/re_base_extraction.t Fri Dec 28 05:17:26 2007
@@ -22,7 +22,7 @@
   if (-e 't/test_dir') { chdir 't'; } 
   if (-e 'test_dir') { unshift(@INC, '../blib/lib'); }
 
-  plan tests => 112;
+  plan tests => 115;
 
 };
 use lib '../lib';
@@ -176,6 +176,29 @@
 
 # ---------------------------------------------------------------------------
 
+# also not suitable for perl 5.6.x
+($running_perl56) and ok(1);
+($running_perl56) and ok(1);
+($running_perl56) and ok(1);
+(!$running_perl56) and try_extraction ('
+
+  body FOO /foobar\x{e2}\x{82}\x{ac}baz/
+
+', {
+    base_extract => 1,
+    bases_must_be_casei => 0,
+    bases_can_use_alternations => 0,
+    bases_can_use_quantifiers => 0,
+    bases_can_use_char_classes => 0,
+    bases_split_out_alternations => 1
+}, [
+
+  'foobar:FOO',
+
+], [ ]);
+
+# ---------------------------------------------------------------------------
+
 try_extraction ('
     body FOO /(?:Viagra|Valium|Xanax|Soma|Cialis){2}/i
 
@@ -358,8 +381,9 @@
     site_rules_filename => "log/test_default.cf",
     userprefs_filename  => "log/userprefs.cf",
     local_tests_only    => 1,
-    debug             => $debug,
-    dont_copy_prefs   => 1,
+    debug               => $debug,
+    dont_copy_prefs     => 1,
+    base_quiet          => 1,
   });
   ok($sa);