You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/21 06:21:56 UTC
svn commit: r1901093 - in /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin: Check.pm OneLineBodyRuleType.pm
Author: hege
Date: Sat May 21 06:21:56 2022
New Revision: 1901093
URL: http://svn.apache.org/viewvc?rev=1901093&view=rev
Log:
Bug 7992 - Capturing and reusing strings for matching across rules
- Check %- right after regex matching, to prevent got_hit or anything else potentially messing with it in the future
- Save all matches on tflags multiple rules
- Remove duplicate values from matches/tags
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1901093&r1=1901092&r2=1901093&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm Sat May 21 06:21:56 2022
@@ -555,6 +555,7 @@ package $package_name;
sub $chunk_methodname {
my \$self = shift;
my \$hits = 0;
+ my \%captures;
EOT
$evalstr .= ' '.$_ for @{$self->{evalstr_chunk_prefix}};
$self->{evalstr} = $evalstr;
@@ -737,13 +738,14 @@ sub do_head_tests {
'dbg("rules-all: running header rule %s", q{'.$rulename.'});' : '').'
$self->rule_ready(q{'.$rulename.'}, 1);
'.($op_infix ? '$test_qr = $qrptr->{q{'.$rulename.'}};' : '').'
- '.$self->capture_rules_code($conf, $rulename).'
+ '.$self->capture_rules_replace($conf, $rulename).'
'.$posline.'
'.$self->hash_line_for_rule($pms, $rulename).'
'.$ifwhile.' ('.$expr.') {
+ '.($op_infix ? $self->capture_plugin_code() : '').'
$self->got_hit(q{'.$rulename.'}, "", ruletype => "header");
'.$self->hit_rule_plugin_code($pms, $rulename, "header", "",
- $matching_string_unavailable, !$op_infix).'
+ $matching_string_unavailable).'
'.$whlast.'
}
'.$self->ran_rule_plugin_code($rulename, "header").'
@@ -776,7 +778,7 @@ sub do_body_tests {
dbg("rules-all: running body rule %s", q{'.$rulename.'});
';
}
- $sub .= $self->capture_rules_code($conf, $rulename);
+ $sub .= $self->capture_rules_replace($conf, $rulename);
my $nosubject = ($conf->{tflags}->{$rulename}||'') =~ /\bnosubject\b/;
if ($nosubject) {
$sub .= '
@@ -802,6 +804,7 @@ sub do_body_tests {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
while ($l =~ /$test_qr/gop) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
'. $self->hit_rule_plugin_code($pms, $rulename, "body", "") . '
'. ($max? 'last body_'.$loopid.' if ++$hits >= '.$max.';' : '') .'
@@ -823,6 +826,7 @@ sub do_body_tests {
$sub .= '
'.$self->hash_line_for_rule($pms, $rulename).'
if ($l =~ /$test_qr/op) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
'. $self->hit_rule_plugin_code($pms, $rulename, "body", "last") .'
}
@@ -866,7 +870,7 @@ sub do_uri_tests {
dbg("rules-all: running uri rule %s", q{'.$rulename.'});
';
}
- $sub .= $self->capture_rules_code($conf, $rulename);
+ $sub .= $self->capture_rules_replace($conf, $rulename);
if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/) {
$loopid++;
my ($max) = $conf->{tflags}->{$rulename} =~ /\bmaxhits=(\d+)\b/;
@@ -877,6 +881,7 @@ sub do_uri_tests {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
while ($l =~ /$test_qr/gop) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
'. $self->hit_rule_plugin_code($pms, $rulename, "uri", "") . '
'. ($max? 'last uri_'.$loopid.' if ++$hits >= '.$max.';' : '') .'
@@ -888,6 +893,7 @@ sub do_uri_tests {
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
if ($l =~ /$test_qr/op) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
'. $self->hit_rule_plugin_code($pms, $rulename, "uri", "last") .'
}
@@ -927,7 +933,7 @@ sub do_rawbody_tests {
dbg("rules-all: running rawbody rule %s", q{'.$rulename.'});
';
}
- $sub .= $self->capture_rules_code($conf, $rulename);
+ $sub .= $self->capture_rules_replace($conf, $rulename);
if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
{
# support multiple matches
@@ -940,6 +946,7 @@ sub do_rawbody_tests {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
while ($l =~ /$test_qr/gop) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
'. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "") . '
'. ($max? 'last rawbody_'.$loopid.' if ++$hits >= '.$max.';' : '') .'
@@ -952,6 +959,7 @@ sub do_rawbody_tests {
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
if ($l =~ /$test_qr/op) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
'. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "last") . '
}
@@ -1006,9 +1014,10 @@ sub do_full_tests {
pos $$fullmsgref = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
dbg("rules-all: running full rule %s", q{'.$rulename.'});
- '.$self->capture_rules_code($conf, $rulename).'
+ '.$self->capture_rules_replace($conf, $rulename).'
$hits = 0;
while ($$fullmsgref =~ /$test_qr/gp) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "FULL: ", ruletype => "full");
'. $self->hit_rule_plugin_code($pms, $rulename, "full", "last") . '
last if ++$hits >= '.$max.';
@@ -1310,14 +1319,24 @@ sub start_rules_plugin_code {
return $evalstr;
}
+sub capture_plugin_code {
+ my ($self) = @_;
+
+ # Save named captures for regex template rules, tags will be set in
+ # ran_rule_plugin_code to allow tflags multiple to save all
+ return '
+ if (%-) {
+ foreach my $cname (keys %-) {
+ push @{$captures{$cname}}, grep { $_ ne "" } @{$-{$cname}};
+ }
+ }
+ ';
+}
+
sub hit_rule_plugin_code {
my ($self, $pms, $rulename, $ruletype, $loop_break_directive,
- $matching_string_unavailable, $no_capture) = @_;
+ $matching_string_unavailable) = @_;
- # note: keep this in 'single quotes' to avoid the $ & performance hit,
- # unless specifically requested by the caller. Also split the
- # two chars, just to be paranoid and ensure that a buggy perl interp
- # doesn't impose that hit anyway (just in case)
my $match;
if ($matching_string_unavailable) {
$match = '"<YES>"'; # nothing better to report, match is not set by this rule
@@ -1327,59 +1346,55 @@ sub hit_rule_plugin_code {
$match = '(defined ${^MATCH} ? ${^MATCH} : "<negative match>")';
}
- my $debug_code = '';
+ my $code = '';
if (exists($pms->{should_log_rule_hits})) {
- $debug_code = '
+ $code .= '
dbg("rules: ran '.$ruletype.' rule '.$rulename.' ======> got hit: \"" . '.
$match.' . "\"");
';
}
- my $save_hits_code = '';
if ($pms->{save_pattern_hits}) {
- $save_hits_code = '
+ $code .= '
$self->{pattern_hits}->{q{'.$rulename.'}} = '.$match.';
';
}
- # Save named captures for regex template rules
- my $capture_code = '';
- if (!$no_capture) {
- $capture_code = '
- foreach my $cname (keys %-) {
- my @cvals = grep { $_ ne "" } @{$-{$cname}};
- foreach my $cval (@cvals) {
- $self->{capture_values}->{$cname}->{$cval} = 1;
- }
- $self->set_tag($cname, @cvals == 1 ? $cvals[0] : \@cvals);
- }
- ';
- }
-
# if we're not running "tflags multiple", break out of the matching
# loop this way
- my $multiple_code = '';
if ($loop_break_directive &&
($pms->{conf}->{tflags}->{$rulename}||'') !~ /\bmultiple\b/) {
- $multiple_code = $loop_break_directive.';';
+ $code .= $loop_break_directive.';';
}
- return $debug_code.$save_hits_code.$capture_code.$multiple_code;
+ return $code;
}
sub ran_rule_plugin_code {
my ($self, $rulename, $ruletype) = @_;
- return '' unless $self->{main}->have_plugin("ran_rule");
+ # Set tags from captured values
+ my $code = '
+ if (%captures) {
+ foreach my $cname (keys %captures) {
+ my @cvals = do { my %seen; grep { !$seen{$_}++ } @{$captures{$cname}} };
+ $self->{capture_values}->{$cname} = \@cvals;
+ $self->set_tag($cname, @cvals == 1 ? $cvals[0] : \@cvals);
+ }
+ %captures = ();
+ }
+ ';
- # The $self here looks odd, but since we are inserting this into eval'd code it
- # needs to be $self which in that case is actually the PerMsgStatus object
- return '
+ if ($self->{main}->have_plugin("ran_rule")) {
+ $code .= '
$self->{main}->call_plugins ("ran_rule", { permsgstatus => $self, rulename => \''.$rulename.'\', ruletype => \''.$ruletype.'\' });
- ';
+ ';
+ }
+
+ return $code;
}
-sub capture_rules_code {
+sub capture_rules_replace {
my ($self, $conf, $rulename) = @_;
return '' unless exists $conf->{capture_rules}->{$rulename};
@@ -1388,7 +1403,7 @@ sub capture_rules_code {
foreach my $cname (keys %{$self->{conf}->{capture_rules}->{q{'.$rulename.'}}}) {
if (exists $self->{capture_values}->{$cname}) {
my $cval = "(?:".join("|", map { quotemeta($_) }
- keys %{$self->{capture_values}->{$cname}}).")";
+ @{$self->{capture_values}->{$cname}}).")";
$test_qr =~ s/"""${cname}"""/$cval/gs;
';
if ($would_log_rules_all) {
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm?rev=1901093&r1=1901092&r2=1901093&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm Sat May 21 06:21:56 2022
@@ -107,6 +107,7 @@ sub do_one_line_body_tests {
my ($self, $line) = @_;
my $qrptr = $self->{main}->{conf}->{test_qrs};
my $hitsptr = $self->{tests_already_hit};
+ my %captures;
';
if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
@@ -137,6 +138,7 @@ sub do_one_line_body_tests {
$sub .= '
'.$self->hash_line_for_rule($pms, $rulename).'
if ($line =~ /$qrptr->{q{'.$rulename.'}}/op) {
+ '.$self->capture_plugin_code().'
$self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "one_line_body");
'. $self->hit_rule_plugin_code($pms, $rulename, "one_line_body", "return 1") . '
}
@@ -148,6 +150,7 @@ sub do_one_line_body_tests {
$sub .= '
$self->rule_ready(q{'.$rulename.'}, 1);
';
+ $sub .= $self->ran_rule_plugin_code($rulename, "one_line_body");
return if ($opts{doing_user_rules} &&
!$self->is_user_rule_sub($rulename.'_one_line_body_test'));