You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/03/09 14:34:25 UTC
svn commit: r1898791 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm branches/3.4/sa-compile.raw trunk/sa-compile.raw trunk/t/sa_compile.t
Author: hege
Date: Wed Mar 9 14:34:25 2022
New Revision: 1898791
URL: http://svn.apache.org/viewvc?rev=1898791&view=rev
Log:
Fix sa-compile with UTF-8 rules, in many cases rules might not hit at all.
Perlapi says:
"SvPVutf8 is like SvPV, but converts sv to UTF-8 first if not already UTF-8."
So change XS code to use SvPV, since SA body is supposed to be in bytes, *duh*.
Add some more tests.
Also backport to 3.4.
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
spamassassin/branches/3.4/sa-compile.raw
spamassassin/trunk/sa-compile.raw
spamassassin/trunk/t/sa_compile.t
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?rev=1898791&r1=1898790&r2=1898791&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Wed Mar 9 14:34:25 2022
@@ -1132,6 +1132,8 @@ sub fixup_re {
$output =~ s/\*\*BACKSLASH\*\*/\\\\/gs;
if ($fixup_re_test) { print "OUTPUT: $output\n" or die "error writing: $!" }
+
+ utf8::encode($output) if utf8::is_utf8($output); # force octets
return $output;
}
Modified: spamassassin/branches/3.4/sa-compile.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/sa-compile.raw?rev=1898791&r1=1898790&r2=1898791&view=diff
==============================================================================
--- spamassassin/branches/3.4/sa-compile.raw (original)
+++ spamassassin/branches/3.4/sa-compile.raw Wed Mar 9 14:34:25 2022
@@ -560,7 +560,7 @@ scan(psv)
AV *results;
CODE:
- pstart = (unsigned char *) SvPVutf8(psv, plen);
+ pstart = (unsigned char *) SvPV(psv, plen);
pend = pstart + plen;
results = (AV *) sv_2mortal((SV *) newAV());
Modified: spamassassin/trunk/sa-compile.raw
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sa-compile.raw?rev=1898791&r1=1898790&r2=1898791&view=diff
==============================================================================
--- spamassassin/trunk/sa-compile.raw (original)
+++ spamassassin/trunk/sa-compile.raw Wed Mar 9 14:34:25 2022
@@ -560,7 +560,7 @@ scan(psv)
AV *results;
CODE:
- pstart = (unsigned char *) SvPVutf8(psv, plen);
+ pstart = (unsigned char *) SvPV(psv, plen);
pend = pstart + plen;
results = (AV *) sv_2mortal((SV *) newAV());
Modified: spamassassin/trunk/t/sa_compile.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/sa_compile.t?rev=1898791&r1=1898790&r2=1898791&view=diff
==============================================================================
--- spamassassin/trunk/t/sa_compile.t (original)
+++ spamassassin/trunk/t/sa_compile.t Wed Mar 9 14:34:25 2022
@@ -1,5 +1,9 @@
#!/usr/bin/perl -T
+###
+### UTF-8 CONTENT, edit with UTF-8 locale/editor
+###
+
use lib '.'; use lib 't';
$ENV{'TEST_PERL_TAINT'} = 'no'; # inhibit for this test
use SATest; sa_t_init("sa_compile");
@@ -15,7 +19,7 @@ use Test::More;
plan skip_all => "Long running tests disabled" unless conf_bool('run_long_tests');
plan skip_all => "Tests don't work on windows" if $RUNNING_ON_WINDOWS;
plan skip_all => "RE2C isn't new enough" unless re2c_version_new_enough();
-plan tests => 5;
+plan tests => 24;
# -------------------------------------------------------------------
@@ -41,39 +45,74 @@ system_or_die "cd $builddir && mv Mail-S
$scr = "$instdir/$temp_binpath/spamassassin";
$scr_localrules_args = $scr_cf_args = ""; # use the default rules dir, from our "install"
-&set_rules("body FOO /You have been selected to receive/");
+&set_rules('
+body FOO1 /You have been selected to receive/
+body FOO2 /You have bee[n] selected to receive/
+body FOO3 /You have bee(?:xyz|\x6e) selected to receive/
+body FOO4 /./
+body FOO5 /éèæ©/
+body FOO6 /éè(?:xyz|æ©)/
+body FOO7 /\xe9\x87\x91\xe8\x9e\x8d\xe6\xa9\x9f/
+body FOO8 /.\x87(?:\x91|\x00)[\xe8\x00]\x9e\x8d\xe6\xa9\x9f/
+');
# ensure we don't use compiled rules
untaint_system("rm -rf $instdir/var/spamassassin/compiled");
%patterns = (
-
- q{ check: tests=FOO }, 'FOO'
-
+ '/ check: tests=FOO1,FOO2,FOO3,FOO4\n/', 'FOO',
);
-
-print "\nRunning spam checks uncompiled\n";
-ok sarun ("-D -Lt < $cwd/data/spam/001 2>&1", \&patterns_run_cb);
+%anti_patterns = (
+ '/ zoom: able to use /', '',
+);
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 1' < $cwd/data/spam/001 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 0' < $cwd/data/spam/001 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+%patterns = (
+ '/ check: tests=FOO4,FOO5,FOO6,FOO7,FOO8\n/', 'FOO',
+);
+%anti_patterns = (
+ '/ zoom: able to use /', '',
+);
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 1' < $cwd/data/spam/unicode1 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 0' < $cwd/data/spam/unicode1 2>&1", \&patterns_run_cb);
ok_all_patterns();
-
clear_pattern_counters();
# -------------------------------------------------------------------
-print "\nRunning spam checks compiled\n";
untaint_system "rm -rf \$HOME/.spamassassin/sa-compile.cache"; # reset test
-system_or_die "$instdir/$temp_binpath/sa-compile --keep-tmps 2>&1"; # --debug
-%patterns = (
-
- q{ able to use 1/1 'body_0' compiled rules }, 'able-to-use',
- q{ check: tests=FOO }, 'FOO'
-
-);
+system_or_die "TMP=$instdir TMPDIR=$instdir $instdir/$temp_binpath/sa-compile --quiet -p $cwd/$workdir/user.cf --keep-tmps -D 2>$instdir/sa-compile.debug"; # --debug
$scr = "$instdir/$temp_binpath/spamassassin";
$scr_localrules_args = $scr_cf_args = ""; # use the default rules dir, from our "install"
-ok sarun ("-D -Lt < $cwd/data/spam/001 2>&1", \&patterns_run_cb);
+%patterns = (
+ q{ zoom: able to use 5/5 'body_0' compiled rules }, 'able-to-use',
+ '/ check: tests=FOO1,FOO2,FOO3,FOO4\n/', 'FOO',
+);
+%anti_patterns = ();
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 1' < $cwd/data/spam/001 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 0' < $cwd/data/spam/001 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+%patterns = (
+ q{ zoom: able to use 5/5 'body_0' compiled rules }, 'able-to-use',
+ '/ check: tests=FOO4,FOO5,FOO6,FOO7,FOO8\n/', 'FOO',
+);
+%anti_patterns = ();
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 1' < $cwd/data/spam/unicode1 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+clear_pattern_counters();
+ok sarun ("-D all,rules-all -L -t --cf 'normalize_charset 0' < $cwd/data/spam/unicode1 2>&1", \&patterns_run_cb);
ok_all_patterns();
+clear_pattern_counters();
# -------------------------------------------------------------------
@@ -130,13 +169,8 @@ sub set_rules {
open RULES, ">$file"
or die "cannot write $file - $!";
- print RULES qq{
-
- use_bayes 0
-
- $rules
-
- };
+ print RULES "use_bayes 0";
+ print RULES $rules;
close RULES or die;
#Create the dir for the pre file