You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/21 08:51:57 UTC
svn commit: r1901096 - in /spamassassin/trunk: ./ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Plugin/ t/
Author: hege
Date: Sat May 21 08:51:57 2022
New Revision: 1901096
URL: http://svn.apache.org/viewvc?rev=1901096&view=rev
Log:
- Named capture cleanups, add tests, new PMS/set_captures, Parser/parse_captures functions (Bug 7992)
- MIMEHeader: support named regex captures, add tflags multiple support, improve tests
Added:
spamassassin/trunk/t/regexp_named_capture.t (with props)
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
spamassassin/trunk/t/mimeheader.t
spamassassin/trunk/t/testrules.yml
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Sat May 21 08:51:57 2022
@@ -553,6 +553,7 @@ t/re_base_extraction.t
t/recips.t
t/recreate.t
t/recursion.t
+t/regexp_named_capture.t
t/regexp_valid.t
t/relative_scores.t
t/relaycountry.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Sat May 21 08:51:57 2022
@@ -1338,18 +1338,7 @@ sub add_test {
return;
}
$conf->{test_qrs}->{$name} = $rec;
- # Check for named regex capture templates
- if (index($rec, '"""') >= 0) {
- local($1);
- while ($rec =~ /"""(\w+)"""/g) {
- $conf->{capture_rules}->{$name}->{$1} = 1;
- }
- }
- # Make rules with captures run before anything else
- if ($rec =~ /\(\?[<']\w/) {
- dbg("config: adjusting regex capture rule $name priority to -10000");
- $conf->{priority}->{$name} = -10000;
- }
+ $self->parse_captures($name, $rec);
}
elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS)
{
@@ -1398,17 +1387,7 @@ sub add_test {
$conf->{test_qrs}->{$name} = $rec;
$conf->{test_opt_header}->{$name} = $hdr;
$conf->{test_opt_neg}->{$name} = 1 if $op eq '!~';
- # Check for named regex capture templates
- if (index($rec, '"""') >= 0) {
- while ($rec =~ /"""(\w+)"""/g) {
- $conf->{capture_rules}->{$name}->{$1} = 1;
- }
- }
- # Make rules with captures run before anything else
- if ($rec =~ /\(\?[<']\w/) {
- dbg("config: adjusting regex capture rule $name priority to -10000");
- $conf->{priority}->{$name} = -10000;
- }
+ $self->parse_captures($name, $rec);
}
}
elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS)
@@ -1533,6 +1512,23 @@ sub is_meta_valid {
return 0;
}
+sub parse_captures {
+ my ($self, $name, $re) = @_;
+
+ # Check for named regex capture templates
+ if (index($re, '"""') >= 0) {
+ local($1);
+ while ($re =~ /"""([A-Z][A-Z0-9_]*)"""/g) {
+ $self->{conf}->{capture_rules}->{$name}->{$1} = 1;
+ }
+ }
+ # Make rules with captures run before anything else
+ if ($re =~ /\(\?[<'][A-Z]/) {
+ dbg("config: adjusting regex capture rule $name priority to -10000");
+ $self->{conf}->{priority}->{$name} = -10000;
+ }
+}
+
# Deprecated functions, leave just in case..
sub is_delimited_regexp_valid {
my ($self, $rule, $re) = @_;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat May 21 08:51:57 2022
@@ -3635,6 +3635,19 @@ sub all_to_addrs {
###########################################################################
+# Save and tag regex named captures, $captures is ref to %- results
+sub set_captures {
+ my ($self, $captures) = @_;
+
+ foreach my $cname (keys %$captures) {
+ my @cvals = do { my %seen; grep { !$seen{$_}++ } @{$captures->{$cname}} };
+ $self->{capture_values}->{$cname} = \@cvals;
+ $self->set_tag($cname, @cvals == 1 ? $cvals[0] : \@cvals);
+ }
+}
+
+###########################################################################
+
1;
__END__
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm Sat May 21 08:51:57 2022
@@ -1376,11 +1376,7 @@ sub ran_rule_plugin_code {
# Set tags from captured values
my $code = '
if (%captures) {
- foreach my $cname (keys %captures) {
- my @cvals = do { my %seen; grep { !$seen{$_}++ } @{$captures{$cname}} };
- $self->{capture_values}->{$cname} = \@cvals;
- $self->set_tag($cname, @cvals == 1 ? $cvals[0] : \@cvals);
- }
+ $self->set_captures(\%captures);
%captures = ();
}
';
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm Sat May 21 08:51:57 2022
@@ -63,7 +63,7 @@ Part 1 = main message headers. Part 2 =
=items tflags NAME_OF_RULE concat
-Concat all headers from all mime parts (possible range applied) into a
+Concatenate all headers from all mime parts (possible range applied) into a
single string for matching. This allows matching headers across multiple
parts with single regex. Normally pattern is tested individually for
different mime parts.
@@ -168,6 +168,9 @@ sub set_config {
$self->{parser}->add_test($rulename, $evalfn."()",
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
+ # Support named regex captures
+ $self->{parser}->parse_captures($rulename, $rec);
+
# evalfn/rulename safe, sanitized by $RULENAME_RE
my $evalcode = '
sub Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn.' {
@@ -195,25 +198,23 @@ sub set_config {
# ---------------------------------------------------------------------------
sub eval_hook_called {
- my ($pobj, $scanner, $rulename) = @_;
+ my ($pobj, $pms, $rulename) = @_;
- my $rule = $scanner->{conf}->{mimeheader_tests}->{$rulename};
+ my $conf = $pms->{conf};
+ my $rule = $conf->{mimeheader_tests}->{$rulename};
my $hdr = $rule->{hdr};
my $negated = $rule->{negated};
- my $if_unset = $rule->{if_unset};
my $pattern = $rule->{pattern};
-
-
- my $getraw;
+ my $tflags = $conf->{tflags}->{$rulename}||'';
+
+ my $getraw = 0;
if ($hdr =~ s/:raw$//) {
$getraw = 1;
- } else {
- $getraw = 0;
}
my $range_min = 0;
my $range_max = 1000;
- if (($scanner->{conf}->{tflags}->{$rulename}||'') =~ /(?:^|\s)range=(\d+)?(-)?(\d+)?(?:\s|$)/) {
+ if ($tflags =~ /(?:^|\s)range=(\d+)?(-)?(\d+)?(?:\s|$)/) {
if (defined $1 && defined $2 && defined $3) {
$range_min = $1;
$range_max = $3;
@@ -229,11 +230,14 @@ sub eval_hook_called {
}
}
- my $concat = ($scanner->{conf}->{tflags}->{$rulename}||'') =~ /\bconcat\b/;
+ my $multiple = $tflags =~ /\bmultiple\b/;
+ my $concat = $tflags =~ /\bconcat\b/;
+ my $maxhits = $tflags =~ /\bmaxhits=(\d+)\b/ ? $1 :
+ $multiple ? 1000 : 1;
my $cval = '';
my $idx = 0;
- foreach my $p ($scanner->{msg}->find_parts(qr/./)) {
+ foreach my $p ($pms->{msg}->find_parts(qr/./)) {
$idx++;
last if $idx > $range_max;
next if $idx < $range_min;
@@ -241,13 +245,12 @@ sub eval_hook_called {
my $val;
if ($hdr eq 'ALL') {
$val = $p->get_all_headers($getraw, 0);
- }
- elsif ($getraw) {
+ } elsif ($getraw) {
$val = $p->raw_header($hdr);
} else {
$val = $p->get_header($hdr);
}
- $val = $if_unset if !defined $val;
+ $val = $rule->{if_unset} if !defined $val;
if ($concat) {
$val .= "\n" unless $val =~ /\n$/;
@@ -255,19 +258,14 @@ sub eval_hook_called {
next;
}
- if ($val =~ /$pattern/p) {
- next if $negated;
- my $match = defined ${^MATCH} ? ${^MATCH} : "<negative match>";
- dbg("mimeheader: ran rule $rulename ======> got hit: \"$match\" (part $idx)");
- return 1;
+ if (_check($pms, $rulename, $val, $pattern, $negated, $maxhits, "part $idx")) {
+ return 0;
}
}
if ($concat) {
- if (!$negated && $cval =~ /$pattern/p) {
- my $match = defined ${^MATCH} ? ${^MATCH} : "<negative match>";
- dbg("mimeheader: ran rule $rulename ======> got hit: \"$match\" (concat)");
- return 1;
+ if (_check($pms, $rulename, $cval, $pattern, $negated, $maxhits, 'concat')) {
+ return 0;
}
}
@@ -279,6 +277,27 @@ sub eval_hook_called {
return 0;
}
+sub _check {
+ my ($pms, $rulename, $value, $pattern, $negated, $maxhits, $desc) = @_;
+
+ my $hits = 0;
+ my %captures;
+ while ($value =~ /$pattern/gp) {
+ last if $negated;
+ if (%-) {
+ foreach my $cname (keys %-) {
+ push @{$captures{$cname}}, grep { $_ ne "" } @{$-{$cname}};
+ }
+ }
+ my $match = defined ${^MATCH} ? ${^MATCH} : "<negative match>";
+ $pms->got_hit($rulename, '', ruletype => 'eval');
+ dbg("mimeheader: ran rule $rulename ======> got hit: \"$match\" ($desc)");
+ last if ++$hits >= $maxhits;
+ }
+ $pms->set_captures(\%captures) if %captures;
+ return $hits;
+}
+
# ---------------------------------------------------------------------------
sub finish_tests {
@@ -294,6 +313,8 @@ sub finish_tests {
sub has_all_header { 1 } # Supports ALL header query (Bug 5582)
sub has_tflags_range { 1 } # Supports tflags range=x-y
-sub has_tflags_concat { 1 } # Support tflags concat
+sub has_tflags_concat { 1 } # Supports tflags concat
+sub has_tflags_multiple { 1 } # Supports tflags multiple
+sub has_capture_rules { 1 } # Supports named regex captures (Bug 7992)
1;
Modified: spamassassin/trunk/t/mimeheader.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/mimeheader.t?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/t/mimeheader.t (original)
+++ spamassassin/trunk/t/mimeheader.t Sat May 21 08:51:57 2022
@@ -2,26 +2,32 @@
use lib '.'; use lib 't';
use SATest; sa_t_init("mimeheader");
-use Test::More tests => 12;
+use Test::More tests => 18;
# ---------------------------------------------------------------------------
%patterns = (
- q{ 1.0 MIMEHEADER_TEST1 }, q{ test1 },
- q{ 1.0 MIMEHEADER_TEST2 }, q{ test2 },
- q{ 1.0 MATCH_NL_NONRAW }, q{ match_nl_nonraw },
- q{ 1.0 MATCH_NL_RAW }, q{ match_nl_raw },
- q{ 1.0 MIMEHEADER_FOUND1 }, q{ unset_found },
- q{ 1.0 MIMEHEADER_FOUND2 }, q{ negate_found },
- q{ 1.0 MIMEHEADER_CONCAT1 }, q{ concat1_found },
- q{ 1.0 MIMEHEADER_RANGE1 }, q{ range1_found },
- q{ 1.0 MIMEHEADER_RANGE2 }, q{ range2_found },
- q{ 1.0 MIMEHEADER_RANGE3 }, q{ range3_found },
- q{ 1.0 MIMEHEADER_RANGE4 }, q{ range4_found },
+ q{ 1.0 MIMEHEADER_TEST1 }, '',
+ q{ 1.0 MIMEHEADER_TEST2 }, '',
+ q{ 1.0 MATCH_NL_NONRAW }, '',
+ q{ 1.0 MATCH_NL_RAW }, '',
+ q{ 1.0 MIMEHEADER_FOUND1 }, '',
+ q{ 1.0 MIMEHEADER_FOUND2 }, '',
+ q{ 1.0 MIMEHEADER_CONCAT1 }, '',
+ q{ 1.0 MIMEHEADER_RANGE1 }, '',
+ q{ 1.0 MIMEHEADER_RANGE2 }, '',
+ q{ 1.0 MIMEHEADER_RANGE3 }, '',
+ q{ 1.0 MIMEHEADER_RANGE4 }, '',
+ q{ 1.0 MIMEHEADER_MULTI1 }, '',
+ q{ 1.0 MIMEHEADER_MULTIMETA1 }, '',
+ q{ 1.0 MIMEHEADER_MULTI2 }, '',
+ q{ 1.0 MIMEHEADER_MULTIMETA2 }, '',
+ q{ 1.0 MIMEHEADER_CAPTURE1 }, '',
+ q{/tag MIMECAP1 is now ready, value: text/plain\n/}, '',
);
%anti_patterns = (
- q{ MIMEHEADER_NOTFOUND }, q{ notfound },
+ q{ MIMEHEADER_NOTFOUND }, '',
);
tstprefs (q{
@@ -51,8 +57,19 @@ tstprefs (q{
mimeheader MIMEHEADER_RANGE4 Content-Type =~ /Jurek/
tflags MIMEHEADER_RANGE4 range=-10
+ # multiple
+ mimeheader MIMEHEADER_MULTI1 Content-Type =~ /-[82]/ # iso-8859-2, two matches
+ tflags MIMEHEADER_MULTI1 multiple
+ meta MIMEHEADER_MULTIMETA1 MIMEHEADER_MULTI1 == 2
+ mimeheader MIMEHEADER_MULTI2 ALL =~ /^X-/m # Count X- starting headers
+ tflags MIMEHEADER_MULTI2 multiple
+ meta MIMEHEADER_MULTIMETA2 MIMEHEADER_MULTI2 == 4
+
+ # named regex capture
+ mimeheader MIMEHEADER_CAPTURE1 Content-Type =~ /(?<MIMECAP1>text\/\w+)/
});
-sarun ("-D mimeheader -L -t < data/nice/004 2>&1", \&patterns_run_cb);
+# Check debug needed for tag check
+sarun ("-D check -L -t < data/nice/004 2>&1", \&patterns_run_cb);
ok_all_patterns();
Added: spamassassin/trunk/t/regexp_named_capture.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/regexp_named_capture.t?rev=1901096&view=auto
==============================================================================
--- spamassassin/trunk/t/regexp_named_capture.t (added)
+++ spamassassin/trunk/t/regexp_named_capture.t Sat May 21 08:51:57 2022
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -T
+
+use lib '.';
+use lib 't';
+use SATest; sa_t_init("regexp_named_capture");
+
+use Test::More;
+plan tests => 10;
+
+# ---------------------------------------------------------------------------
+
+%patterns = (
+ q{ TEST_CAPTURE_1 } => '',
+ q{ TEST_CAPTURE_2 } => '',
+ q{ TEST_CAPTURE_3 } => '',
+ q{ TEST_CAPTURE_4 } => '',
+ q{ TEST_CAPTURE_5 } => '',
+ q{/tag TESTCAP1 is now ready, value: Ximian\n/} => '',
+ q{/tag TESTCAP2 is now ready, value: Ximian\n/} => '',
+ q{/tag TESTCAP3 is now ready, value: gnome.org\n/} => '',
+ q{/tag TESTCAP4 is now ready, value: milkplus\n/} => '',
+ q{/tag TESTCAP5 is now ready, value: release\n/} => '',
+);
+%anti_patterns = ();
+
+tstlocalrules (q{
+ body TEST_CAPTURE_1 /release of (?<TESTCAP1>\w+)/
+ rawbody TEST_CAPTURE_2 /release of (?<TESTCAP2>\w+)/
+ uri TEST_CAPTURE_3 /ftp\.(?<TESTCAP3>[\w.]+)/
+ header TEST_CAPTURE_4 Message-ID =~ /@(?<TESTCAP4>\w+)/
+ full TEST_CAPTURE_5 /X-Spam-Status.* preview (?<TESTCAP5>\w+)/s
+});
+
+sarun ("-D check -L -t < data/nice/001 2>&1", \&patterns_run_cb);
+ok_all_patterns();
+
Propchange: spamassassin/trunk/t/regexp_named_capture.t
------------------------------------------------------------------------------
svn:executable = *
Modified: spamassassin/trunk/t/testrules.yml
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/testrules.yml?rev=1901096&r1=1901095&r2=1901096&view=diff
==============================================================================
--- spamassassin/trunk/t/testrules.yml (original)
+++ spamassassin/trunk/t/testrules.yml Sat May 21 08:51:57 2022
@@ -9,6 +9,7 @@ seq:
- t/uri*.t
- t/get*.t
- t/header*.t
+ - t/regexp*.t
- t/*dns*.t
- t/rule*.t
# tests that are not parallel-ready (will run in isolation)