You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/12/28 14:17:27 UTC
svn commit: r607239 - in /spamassassin/branches/3.2:
lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm t/re_base_extraction.t
Author: jm
Date: Fri Dec 28 05:17:26 2007
New Revision: 607239
URL: http://svn.apache.org/viewvc?rev=607239&view=rev
Log:
bug 5696: cut regexp base strings at Unicode high codepoints, to avoid corruption of patterns containing UTF-8
Modified:
spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
spamassassin/branches/3.2/t/re_base_extraction.t
Modified: spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?rev=607239&r1=607238&r2=607239&view=diff
==============================================================================
--- spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/3.2/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Fri Dec 28 05:17:26 2007
@@ -119,7 +119,7 @@
my $progress;
$self->{show_progress} and $progress = Mail::SpamAssassin::Util::Progress->new({
- total => scalar keys %{$rules},
+ total => (scalar keys %{$rules} || 1),
itemtype => 'rules',
});
@@ -258,7 +258,7 @@
$count = 0;
$self->{show_progress} and $progress = Mail::SpamAssassin::Util::Progress->new({
- total => scalar @good_bases,
+ total => (scalar @good_bases || 1),
itemtype => 'bases',
});
@@ -480,7 +480,7 @@
}
}
}
- print $tmpfh "m".$quos.$rule.$quos.$mods;
+ print $tmpfh "use bytes; m".$quos.$rule.$quos.$mods;
close $tmpfh or die "cannot write to $tmpf";
my $perl = $self->get_perl();
@@ -582,8 +582,13 @@
# we can do both, since we canonicalize to lc.
if (!$spcs && $item =~ /^EXACT/ && $args =~ /<(.*)>/)
{
- $buf .= $1;
- if (length $1 >= 55 && $buf =~ s/\.\.\.$//) {
+ my $str = $1;
+ $buf .= $str;
+ if ($buf =~ s/\\x\{[0-9a-fA-F]{4,}\}.*$//) {
+ # a high Unicode codepoint, interpreted by perl 5.8.x. cut and stop
+ $add_candidate->();
+ }
+ if (length $str >= 55 && $buf =~ s/\.\.\.$//) {
# perl 5.8.x truncates with a "..." here! cut and stop
$add_candidate->();
}
Modified: spamassassin/branches/3.2/t/re_base_extraction.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.2/t/re_base_extraction.t?rev=607239&r1=607238&r2=607239&view=diff
==============================================================================
--- spamassassin/branches/3.2/t/re_base_extraction.t (original)
+++ spamassassin/branches/3.2/t/re_base_extraction.t Fri Dec 28 05:17:26 2007
@@ -22,7 +22,7 @@
if (-e 't/test_dir') { chdir 't'; }
if (-e 'test_dir') { unshift(@INC, '../blib/lib'); }
- plan tests => 112;
+ plan tests => 115;
};
use lib '../lib';
@@ -176,6 +176,29 @@
# ---------------------------------------------------------------------------
+# also not suitable for perl 5.6.x
+($running_perl56) and ok(1);
+($running_perl56) and ok(1);
+($running_perl56) and ok(1);
+(!$running_perl56) and try_extraction ('
+
+ body FOO /foobar\x{e2}\x{82}\x{ac}baz/
+
+', {
+ base_extract => 1,
+ bases_must_be_casei => 0,
+ bases_can_use_alternations => 0,
+ bases_can_use_quantifiers => 0,
+ bases_can_use_char_classes => 0,
+ bases_split_out_alternations => 1
+}, [
+
+ 'foobar:FOO',
+
+], [ ]);
+
+# ---------------------------------------------------------------------------
+
try_extraction ('
body FOO /(?:Viagra|Valium|Xanax|Soma|Cialis){2}/i
@@ -358,8 +381,9 @@
site_rules_filename => "log/test_default.cf",
userprefs_filename => "log/userprefs.cf",
local_tests_only => 1,
- debug => $debug,
- dont_copy_prefs => 1,
+ debug => $debug,
+ dont_copy_prefs => 1,
+ base_quiet => 1,
});
ok($sa);