You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2018/12/14 21:05:01 UTC
svn commit: r1848969 [2/2] - in /spamassassin/branches/3.4: ./
lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Conf/
lib/Mail/SpamAssassin/Plugin/ t/
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm Fri Dec 14 21:05:01 2018
@@ -57,7 +57,8 @@ our @EXPORT_OK = qw(&local_tz &base64_de
&exit_status_str &proc_status_ok &am_running_on_windows
&reverse_ip_address &decode_dns_question_entry &touch_file
&get_my_locales &parse_rfc822_date &get_user_groups
- &secure_tmpfile &secure_tmpdir &uri_list_canonicalize);
+ &secure_tmpfile &secure_tmpdir &uri_list_canonicalize
+ &compile_regexp &qr_to_string);
our $AM_TAINTED;
@@ -1097,7 +1098,8 @@ with Perl.
sub first_available_module {
my (@packages) = @_;
foreach my $mod (@packages) {
- if (eval 'require '.$mod.'; 1; ') {
+ next if $mod !~ /^[\w:]+$/; # be paranoid
+ if (eval 'require '.$mod.'; 1;') {
return $mod;
}
}
@@ -1267,6 +1269,8 @@ sub secure_tmpdir {
## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain.
##
+###########################################################################
+
*uri_list_canonify = \&uri_list_canonicalize; # compatibility alias
sub uri_list_canonicalize {
my($redirector_patterns, @uris) = @_;
@@ -1729,6 +1733,157 @@ sub trap_sigalrm_fully {
###########################################################################
+# returns ($compiled_re, $error)
+# if any errors, $compiled_re = undef, $error has string
+# args:
+# - regexp
+# - strip_delimiters (default: 1) (value 2 means, try strip, but don't error)
+# - ignore_always_matching (default: 0)
+sub compile_regexp {
+ my ($re, $strip_delimiters, $ignore_always_matching) = @_;
+ local($1);
+
+ # Do not allow already compiled regexes or other funky refs
+ if (ref($re)) {
+ return (undef, 'ref passed');
+ }
+
+ # try stripping by default
+ $strip_delimiters = 1 if !defined $strip_delimiters;
+
+ # OK, try to remove any normal perl-style regexp delimiters at
+ # the start and end, and modifiers at the end if present,
+ # so we can validate those too.
+ my $origre = $re;
+ my $delim_end = '';
+
+ if ($strip_delimiters >= 1) {
+ # most common delimiter
+ if ($re =~ s{^/}{}) {
+ $delim_end = '/';
+ }
+ # symmetric delimiters
+ elsif ($re =~ s/^(?:m|qr)([\{\(\<\[])//) {
+ ($delim_end = $1) =~ tr/\{\(\<\[/\}\)\>\]/;
+ }
+ # any non-wordchar delimiter, but let's ignore backslash..
+ elsif ($re =~ s/^(?:m|qr)(\W)//) {
+ $delim_end = $1;
+ if ($delim_end eq '\\') {
+ return (undef, 'backslash delimiter not allowed');
+ }
+ }
+ elsif ($strip_delimiters != 2) {
+ return (undef, 'missing regexp delimiters');
+ }
+ }
+
+ # cut end delimiter, mods
+ my $mods;
+ if ($delim_end) {
+ # Ignore e because paranoid
+ if ($re =~ s/\Q${delim_end}\E([a-df-z]*)\z//) {
+ $mods = $1;
+ } else {
+ return (undef, 'invalid end delimiter/mods');
+ }
+ }
+
+ # paranoid check for eval exec (?{foo}), in case someone
+ # actually put "use re 'eval'" somewhere..
+ if ($re =~ /\(\?\??\{/) {
+ return (undef, 'eval (?{}) found');
+ }
+
+ # check unescaped delimiter, but only if it's not symmetric,
+ # those will fp on .{0,10} [xyz] etc, no need for so strict checks
+ # since these regexes don't end up in eval strings anyway
+ if ($delim_end && $delim_end !~ tr/\}\)\]//) {
+ # first we remove all escaped backslashes "\\"
+ my $dbs_stripped = $re;
+ $dbs_stripped =~ s/\\\\//g;
+ # now we can properly check if something is unescaped
+ if ($dbs_stripped =~ /(?<!\\)\Q${delim_end}\E/) {
+ return (undef, "unquoted delimiter '$delim_end' found");
+ }
+ }
+
+ if ($ignore_always_matching) {
+ if (my $err = is_always_matching_regexp($re)) {
+ return (undef, "always matching regexp: $err");
+ }
+ }
+
+ # now prepend the modifiers, in order to check if they're valid
+ if ($mods) {
+ $re = '(?'.$mods.')'.$re;
+ }
+
+ # no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
+ my $compiled_re;
+ $re = untaint_var($re);
+ my $ok = eval {
+ # don't dump deprecated warnings to user STDERR
+ # but die on any other warning for safety?
+ local $SIG{__WARN__} = sub {
+ if ($_[0] !~ /deprecated/i) {
+ die "$_[0]\n";
+ }
+ };
+ $compiled_re = qr/$re/; 1;
+ };
+ if ($ok && ref($compiled_re) eq 'Regexp') {
+ #$origre = untaint_var($origre);
+ #dbg("config: accepted regex '%s' => '%s'", $origre, $compiled_re);
+ return ($compiled_re, '');
+ } else {
+ my $err = $@ ne '' ? $@ : "errno=$!"; chomp $err;
+ $err =~ s/ at .*? line \d.*$//;
+ return (undef, $err);
+ }
+}
+
+sub is_always_matching_regexp {
+ my ($re) = @_;
+
+ if ($re eq '') {
+ return "empty";
+ }
+ elsif ($re =~ /(?<!\\)\|\|/) {
+ return "contains '||'";
+ }
+ elsif ($re =~ /^\|/) {
+ return "starts with '|'";
+ }
+ elsif ($re =~ /\|(?<!\\\|)$/) {
+ return "ends with '|'";
+ }
+
+ return undef;
+}
+
+# convert compiled regexp (?^i:foo) to string (?i)foo
+sub qr_to_string {
+ my ($re) = @_;
+
+ return undef unless ref($re) eq 'Regexp';
+ $re = "".$re; # stringify
+
+ local($1);
+ $re =~ s/^\(\?\^([a-z]*)://;
+ my $mods = $1;
+ $re =~ s/\)\z//;
+
+ return ($mods ? "(?$mods)$re" : $re);
+}
+
+###########################################################################
+
+###
+### regexp_remove_delimiters and make_qr DEPRECATED, to be removed
+### compile_regexp() should be used everywhere
+###
+
# Removes any normal perl-style regexp delimiters at
# the start and end, and modifiers at the end (if present).
# If modifiers are found, they are inserted into the pattern using
@@ -1737,27 +1892,33 @@ sub trap_sigalrm_fully {
sub regexp_remove_delimiters {
my ($re) = @_;
+ warn("deprecated Util regexp_remove_delimiters() called\n");
+
my $delim;
if (!defined $re || $re eq '') {
- warn "cannot remove delimiters from null regexp";
- return; # invalid
+ return undef;
}
- elsif ($re =~ s/^m\{//) { # m{foo/bar}
+ elsif ($re =~ s/^m?\{//) { # m{foo/bar}
$delim = '}';
}
- elsif ($re =~ s/^m\(//) { # m(foo/bar)
+ elsif ($re =~ s/^m?\[//) { # m[foo/bar]
+ $delim = ']';
+ }
+ elsif ($re =~ s/^m?\(//) { # m(foo/bar)
$delim = ')';
}
- elsif ($re =~ s/^m<//) { # m<foo/bar>
+ elsif ($re =~ s/^m?<//) { # m<foo/bar>
$delim = '>';
}
- elsif ($re =~ s/^m(\W)//) { # m#foo/bar#
+ elsif ($re =~ s/^m?(\W)//) { # m#foo/bar#
$delim = $1;
} else { # /foo\/bar/ or !foo/bar!
- $re =~ s/^(\W)//; $delim = $1;
+ return undef; # invalid
}
- $re =~ s/\Q${delim}\E([imsx]*)$// or warn "unbalanced re: $re";
+ if ($re !~ s/\Q${delim}\E([imsx]*)$//) {
+ return undef;
+ }
my $mods = $1;
if ($mods) {
@@ -1771,8 +1932,17 @@ sub regexp_remove_delimiters {
sub make_qr {
my ($re) = @_;
+
+ warn("deprecated Util make_qr() called\n");
+
$re = regexp_remove_delimiters($re);
- return qr/$re/;
+ return undef if !defined $re || $re eq '';
+ my $compiled_re;
+ if (eval { $compiled_re = qr/$re/; 1; } && ref($compiled_re) eq 'Regexp') {
+ return $compiled_re;
+ } else {
+ return undef;
+ }
}
###########################################################################
Modified: spamassassin/branches/3.4/t/dnsbl.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/dnsbl.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/dnsbl.t (original)
+++ spamassassin/branches/3.4/t/dnsbl.t Fri Dec 14 21:05:01 2018
@@ -7,7 +7,7 @@ use Test::More;
plan skip_all => "Long running tests disabled" unless conf_bool('run_long_tests');
plan skip_all => "Net tests disabled" unless conf_bool('run_net_tests');
plan skip_all => "Can't use Net::DNS Safely" unless can_use_net_dns_safely();
-plan tests => 23;
+plan tests => 17;
# ---------------------------------------------------------------------------
# bind configuration currently used to support this test
@@ -54,7 +54,6 @@ EOF
q{ <dns:14.35.17.212.dnsbltest.spamassassin.org> [127.0.0.1] } => 'P_4',
q{ <dns:226.149.120.193.dnsbltest.spamassassin.org> [127.0.0.1] } => 'P_5',
q{ <dns:example.com.dnsbltest.spamassassin.org> [127.0.0.2] } => 'P_6',
- q{ <dns:134.88.73.210.sb.dnsbltest.spamassassin.org?type=TXT> } => 'P_7',
q{,DNSBL_TEST_TOP,} => 'P_8',
q{,DNSBL_TEST_WHITELIST,} => 'P_9',
q{,DNSBL_TEST_DYNAMIC,} => 'P_10',
@@ -63,16 +62,11 @@ EOF
q{,DNSBL_TXT_TOP,} => 'P_13',
q{,DNSBL_TXT_RE,} => 'P_14',
q{,DNSBL_RHS,} => 'P_15',
- q{,DNSBL_SB_TIME,} => 'P_16',
- q{,DNSBL_SB_FLOAT,} => 'P_17',
- q{,DNSBL_SB_STR,} => 'P_18',
);
%anti_patterns = (
q{,DNSBL_TEST_MISS,} => 'P_19',
q{,DNSBL_TXT_MISS,} => 'P_20',
- q{,DNSBL_SB_UNDEF,} => 'P_21',
- q{,DNSBL_SB_MISS,} => 'P_22',
q{ launching DNS A query for 14.35.17.212.untrusted.dnsbltest.spamassassin.org. } => 'untrusted',
);
@@ -136,28 +130,6 @@ header DNSBL_RHS eval:check_rbl_from_hos
describe DNSBL_RHS DNSBL RHS match
tflags DNSBL_RHS net
-header __TEST_SENDERBASE eval:check_rbl_txt('sb', 'sb.dnsbltest.spamassassin.org.')
-tflags __TEST_SENDERBASE net
-
-header DNSBL_SB_TIME eval:check_rbl_sub('sb', 'sb:S6 == 1060085863 && S6 < time')
-describe DNSBL_SB_TIME DNSBL SenderBase time
-tflags DNSBL_SB_TIME net
-
-header DNSBL_SB_FLOAT eval:check_rbl_sub('sb', 'sb:S3 > 7.0 && S3 < 7.2')
-describe DNSBL_SB_FLOAT DNSBL SenderBase floating point
-tflags DNSBL_SB_FLOAT net
-
-header DNSBL_SB_STR eval:check_rbl_sub('sb', 'sb:S1 eq \"Spammer Networks\" && S49 !~ /Y/ && index(S21, \".com\") > 0')
-describe DNSBL_SB_STR DNSBL SenderBase strings
-tflags DNSBL_SB_STR net
-
-header DNSBL_SB_UNDEF eval:check_rbl_sub('sb', 'sb:S98 =~ /foo/ && S99 > 10')
-describe DNSBL_SB_UNDEF DNSBL SenderBase undefined
-tflags DNSBL_SB_UNDEF net
-
-header DNSBL_SB_MISS eval:check_rbl_sub('sb', 'sb:S2 < 3.0')
-describe DNSBL_SB_MISS DNSBL SenderBase miss
-tflags DNSBL_SB_MISS net
");
# The -D clobbers test performance but some patterns & antipatterns depend on debug output
Modified: spamassassin/branches/3.4/t/if_can.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/if_can.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/if_can.t (original)
+++ spamassassin/branches/3.4/t/if_can.t Fri Dec 14 21:05:01 2018
@@ -2,7 +2,7 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("if_can");
-use Test::More tests => 13;
+use Test::More tests => 16;
# ---------------------------------------------------------------------------
@@ -16,6 +16,9 @@ use Test::More tests => 13;
q{ SHOULD_BE_CALLED5 }, 'should_be_called5',
q{ SHOULD_BE_CALLED6 }, 'should_be_called6',
q{ SHOULD_BE_CALLED7 }, 'should_be_called7',
+ q{ SHOULD_BE_CALLED8 }, 'should_be_called8',
+ q{ SHOULD_BE_CALLED9 }, 'should_be_called9',
+ q{ SHOULD_BE_CALLED10 }, 'should_be_called10',
);
%anti_patterns = (
@@ -51,6 +54,15 @@ tstlocalrules (q{
if (!can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_nosuch))
body SHOULD_BE_CALLED7 /./
endif
+ if can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_true) && version > 0.00000
+ body SHOULD_BE_CALLED8 /./
+ endif
+ if !can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_false ) && !(! version > 0.00000)
+ body SHOULD_BE_CALLED9 /./
+ endif
+ if has(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_true) && (!can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_nosuch))
+ body SHOULD_BE_CALLED10 /./
+ endif
if !has(Mail::SpamAssassin::Plugin::Test::check_test_plugin)
body SHOULD_NOT_BE_CALLED1 /./
Modified: spamassassin/branches/3.4/t/mimeheader.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/mimeheader.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/mimeheader.t (original)
+++ spamassassin/branches/3.4/t/mimeheader.t Fri Dec 14 21:05:01 2018
@@ -2,7 +2,7 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("mimeheader");
-use Test::More tests => 4;
+use Test::More tests => 6;
$ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the patterns to work
@@ -14,18 +14,33 @@ $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C';
q{ MIMEHEADER_TEST2 }, q{ test2 },
q{ MATCH_NL_NONRAW }, q{ match_nl_nonraw },
q{ MATCH_NL_RAW }, q{ match_nl_raw },
+ q{ MIMEHEADER_FOUND }, q{ unset_found },
);
+%anti_patterns = (
+
+ q{ MIMEHEADER_NOTFOUND }, q{ unset_notfound },
+
+);
+
+tstpre(q{
+
+ loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
+
+});
+
tstprefs (q{
- # loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
mimeheader MIMEHEADER_TEST1 content-type =~ /application\/msword/
mimeheader MIMEHEADER_TEST2 content-type =~ m!APPLICATION/MSWORD!i
mimeheader MATCH_NL_NONRAW Content-Type =~ /msword; name/
mimeheader MATCH_NL_RAW Content-Type:raw =~ /msword;\n\tname/
+ mimeheader MIMEHEADER_NOTFOUND xyzzy =~ /foobar/
+ mimeheader MIMEHEADER_FOUND xyzzy =~ /foobar/ [if-unset: xyzfoobarxyz]
+
});
sarun ("-L -t < data/nice/004", \&patterns_run_cb);
Modified: spamassassin/branches/3.4/t/regexp_valid.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/regexp_valid.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/regexp_valid.t (original)
+++ spamassassin/branches/3.4/t/regexp_valid.t Fri Dec 14 21:05:01 2018
@@ -18,55 +18,34 @@ if (-e 'test_dir') { # runnin
use strict;
use lib '.'; use lib 't';
use SATest; sa_t_init("regexp_valid");
+use Mail::SpamAssassin::Util qw(compile_regexp);
-use Test::More tests => 24;
-
-# initialize SpamAssassin
-use Mail::SpamAssassin;
-my $sa = create_saobj({'dont_copy_prefs' => 1});
-$sa->init(0); # parse rules
-
-
-# make a _copy_ of the STDERR file descriptor
-# (so we can restore it after redirecting it)
-open(OLDERR, ">&STDERR") || die "Cannot copy STDERR file handle";
-
-# create a file descriptior for logging STDERR
-# (we do not want warnings for regexps we know are invalid)
-my $fh = IO::File->new_tmpfile();
-open(LOGERR, ">&".fileno($fh)) || die "Cannot create LOGERR temp file";
-
-# quiet "used only once" warnings
-1 if *OLDERR;
-1 if *LOGERR;
-
+use Test::More tests => 41;
+my $showerr;
sub tryone {
- my $re = shift;
- return $sa->{conf}->{parser}->is_regexp_valid('test', $re);
+ my ($re, $strip) = @_;
+ $strip = 1 if !defined $strip;
+ my ($rec, $err) = compile_regexp($re, $strip, 1);
+ if (!$rec && $showerr) { print STDERR "invalid regex '$re': $err\n"; }
+ return $rec;
}
# test valid regexps with this sub
sub goodone {
- my $re = shift;
- open(STDERR, ">&=OLDERR") || die "Cannot reopen STDERR";
- return tryone $re;
+ my ($re, $strip) = @_;
+ $showerr = 1;
+ return tryone($re, $strip);
}
# test invalid regexps with this sub
sub badone {
- my $re = shift;
- open(STDERR, ">&=LOGERR") || die "Cannot reopen STDERR (for logging)";
- return !tryone $re;
+ my ($re, $strip) = @_;
+ $showerr = 0;
+ return !tryone($re, $strip);
}
-ok goodone qr/foo bar/;
-ok goodone qr/foo bar/i;
-ok goodone qr/foo bar/is;
-ok goodone qr/foo bar/im;
-ok goodone qr!foo bar!im;
-
ok goodone 'qr/foo bar/';
ok goodone 'qr/foo bar/im';
ok goodone 'qr!foo bar!';
@@ -80,14 +59,38 @@ ok goodone 'm{foo bar}is';
ok goodone 'm(foo bar)is';
ok goodone 'm<foo bar>is';
-ok goodone 'foo bar';
-ok goodone 'foo/bar';
-ok badone 'foo(bar';
-ok badone 'foo(?{1})bar';
+ok goodone 'foo bar', 0;
+ok goodone 'foo/bar', 0;
+ok badone 'foo(bar', 0;
+ok badone 'foo(?{1})bar';
+ok badone 'foo(??{1})bar';
ok badone '/foo(?{1})bar/';
+ok badone '/foo(??{1})bar/';
ok badone 'm!foo(?{1})bar!';
-# ok badone '/test//'; # removed for bug 4700
-ok goodone '.*';
+
+ok goodone '/test\//';
+ok badone '/test//'; # removed for bug 4700 - and back from 7648
+ok badone 'm!test!xyz!i';
+ok badone '//';
+ok badone 'm!|foo!';
+ok goodone 'm!\|foo!';
+ok badone 'm{bar||y}';
+
+ok goodone 'm{test}}'; # it's actually bad, but no way to parse this with simple code
+ok goodone 'm}test}}'; # it's actually bad, but no way to parse this with simple code
+ok goodone 'm{test{}'; # it's good even though perl warns unescaped { is deprecated
+ok goodone 'm}test{}';
+ok goodone 'm{test.{0,10}}';
+ok goodone 'm}test.{0,10}}';
+ok goodone 'm[foo[bar]]';
+ok badone 'm[foo[bar\]]';
+ok goodone 'm(foo(?:bar)x)';
+ok badone 'm(foo\(?:bar)x)';
+ok goodone 'm/test # comment/x';
+ok badone 'm/test # comm/ent/x'; # well you shouldn't use comments anyway
+ok goodone 'm[test # \] foo []x';
+
+ok goodone '.*', 0;
ok goodone 'm*<a[^<]{0,60} onMouseMove=(?:3D)?"window.status=(?:3D)?\'https?://*';
Modified: spamassassin/branches/3.4/t/stop_always_matching_regexps.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/stop_always_matching_regexps.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/stop_always_matching_regexps.t (original)
+++ spamassassin/branches/3.4/t/stop_always_matching_regexps.t Fri Dec 14 21:05:01 2018
@@ -13,20 +13,18 @@ BEGIN {
use lib '.'; use lib 't';
use SATest; sa_t_init("stop_always_matching_regexps");
-use Test::More tests => 13;
+use Test::More tests => 12;
# ---------------------------------------------------------------------------
use strict;
require Mail::SpamAssassin;
-
-my $sa = create_saobj({'dont_copy_prefs' => 1});
-$sa->init(0);
-ok($sa);
+use Mail::SpamAssassin::Util qw(compile_regexp);
sub is_caught {
my ($re) = @_;
- return $sa->{conf}->{parser}->is_always_matching_regexp($re, $re);
+ my ($rec, $err) = compile_regexp($re, 0, 1);
+ return !$rec;
}
ok !is_caught 'foo|bar';